comparison nss/lib/util/utf8.c @ 0:1e5118fa0cb1

This is NSS with a Cmake Buildsyste To compile a static NSS library for Windows we've used the Chromium-NSS fork and added a Cmake buildsystem to compile it statically for Windows. See README.chromium for chromium changes and README.trustbridge for our modifications.
author Andre Heinecke <andre.heinecke@intevation.de>
date Mon, 28 Jul 2014 10:47:06 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1e5118fa0cb1
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "seccomon.h"
6 #include "secport.h"
7
8 #ifdef TEST_UTF8
9 #include <assert.h>
10 #undef PORT_Assert
11 #define PORT_Assert assert
12 #endif
13
14 /*
15 * From RFC 2044:
16 *
17 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
18 * 0000 0000-0000 007F 0xxxxxxx
19 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
20 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
21 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
22 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
23 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
24 */
25
26 /*
27 * From http://www.imc.org/draft-hoffman-utf16
28 *
29 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000
30 *
31 * U' = yyyyyyyyyyxxxxxxxxxx
32 * W1 = 110110yyyyyyyyyy
33 * W2 = 110111xxxxxxxxxx
34 */
35
36 /*
37 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
38 * character values. If you wish to use this code for working with
39 * host byte order values, define the following:
40 *
41 * #if IS_BIG_ENDIAN
42 * #define L_0 0
43 * #define L_1 1
44 * #define L_2 2
45 * #define L_3 3
46 * #define H_0 0
47 * #define H_1 1
48 * #else / * not everyone has elif * /
49 * #if IS_LITTLE_ENDIAN
50 * #define L_0 3
51 * #define L_1 2
52 * #define L_2 1
53 * #define L_3 0
54 * #define H_0 1
55 * #define H_1 0
56 * #else
57 * #error "PDP and NUXI support deferred"
58 * #endif / * IS_LITTLE_ENDIAN * /
59 * #endif / * IS_BIG_ENDIAN * /
60 */
61
62 #define L_0 0
63 #define L_1 1
64 #define L_2 2
65 #define L_3 3
66 #define H_0 0
67 #define H_1 1
68
69 #define BAD_UTF8 ((PRUint32)-1)
70
71 /*
72 * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
73 * of Unicode 4.0.0.
74 *
75 * Parameters:
76 * index - Points to the byte offset in inBuf of character to read. On success,
77 * updated to the offset of the following character.
78 * inBuf - Input buffer, UTF-8 encoded
79 * inbufLen - Length of input buffer, in bytes.
80 *
81 * Returns:
82 * Success - The UCS4 encoded character
83 * Failure - BAD_UTF8
84 */
85 static PRUint32
86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
87 {
88 PRUint32 result;
89 unsigned int i = *index;
90 int bytes_left;
91 PRUint32 min_value;
92
93 PORT_Assert(i < inBufLen);
94
95 if ( (inBuf[i] & 0x80) == 0x00 ) {
96 result = inBuf[i++];
97 bytes_left = 0;
98 min_value = 0;
99 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
100 result = inBuf[i++] & 0x1F;
101 bytes_left = 1;
102 min_value = 0x80;
103 } else if ( (inBuf[i] & 0xF0) == 0xE0) {
104 result = inBuf[i++] & 0x0F;
105 bytes_left = 2;
106 min_value = 0x800;
107 } else if ( (inBuf[i] & 0xF8) == 0xF0) {
108 result = inBuf[i++] & 0x07;
109 bytes_left = 3;
110 min_value = 0x10000;
111 } else {
112 return BAD_UTF8;
113 }
114
115 while (bytes_left--) {
116 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
117 result = (result << 6) | (inBuf[i++] & 0x3F);
118 }
119
120 /* Check for overlong sequences, surrogates, and outside unicode range */
121 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
122 return BAD_UTF8;
123 }
124
125 *index = i;
126 return result;
127 }
128
129 PRBool
130 sec_port_ucs4_utf8_conversion_function
131 (
132 PRBool toUnicode,
133 unsigned char *inBuf,
134 unsigned int inBufLen,
135 unsigned char *outBuf,
136 unsigned int maxOutBufLen,
137 unsigned int *outBufLen
138 )
139 {
140 PORT_Assert((unsigned int *)NULL != outBufLen);
141
142 if( toUnicode ) {
143 unsigned int i, len = 0;
144
145 for( i = 0; i < inBufLen; ) {
146 if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
147 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
148 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
149 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
150 else return PR_FALSE;
151
152 len += 4;
153 }
154
155 if( len > maxOutBufLen ) {
156 *outBufLen = len;
157 return PR_FALSE;
158 }
159
160 len = 0;
161
162 for( i = 0; i < inBufLen; ) {
163 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
164
165 if (ucs4 == BAD_UTF8) return PR_FALSE;
166
167 outBuf[len+L_0] = 0x00;
168 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
169 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
170 outBuf[len+L_3] = (unsigned char)ucs4;
171
172 len += 4;
173 }
174
175 *outBufLen = len;
176 return PR_TRUE;
177 } else {
178 unsigned int i, len = 0;
179 PORT_Assert((inBufLen % 4) == 0);
180 if ((inBufLen % 4) != 0) {
181 *outBufLen = 0;
182 return PR_FALSE;
183 }
184
185 for( i = 0; i < inBufLen; i += 4 ) {
186 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
187 *outBufLen = 0;
188 return PR_FALSE;
189 } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
190 else if( inBuf[i+L_2] >= 0x08 ) len += 3;
191 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
192 else len += 1;
193 }
194
195 if( len > maxOutBufLen ) {
196 *outBufLen = len;
197 return PR_FALSE;
198 }
199
200 len = 0;
201
202 for( i = 0; i < inBufLen; i += 4 ) {
203 if( inBuf[i+L_1] >= 0x01 ) {
204 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
205 /* 00000000 000abcde fghijklm nopqrstu ->
206 11110abc 10defghi 10jklmno 10pqrstu */
207
208 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
209 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
210 | ((inBuf[i+L_2] & 0xF0) >> 4);
211 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
212 | ((inBuf[i+L_3] & 0xC0) >> 6);
213 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
214
215 len += 4;
216 } else if( inBuf[i+L_2] >= 0x08 ) {
217 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
218 /* 00000000 00000000 abcdefgh ijklmnop ->
219 1110abcd 10efghij 10klmnop */
220
221 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
222 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
223 | ((inBuf[i+L_3] & 0xC0) >> 6);
224 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
225
226 len += 3;
227 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
228 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
229 /* 00000000 00000000 00000abc defghijk ->
230 110abcde 10fghijk */
231
232 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
233 | ((inBuf[i+L_3] & 0xC0) >> 6);
234 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
235
236 len += 2;
237 } else {
238 /* 0000 0000-0000 007F -> 0xxxxxx */
239 /* 00000000 00000000 00000000 0abcdefg ->
240 0abcdefg */
241
242 outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
243
244 len += 1;
245 }
246 }
247
248 *outBufLen = len;
249 return PR_TRUE;
250 }
251 }
252
253 PRBool
254 sec_port_ucs2_utf8_conversion_function
255 (
256 PRBool toUnicode,
257 unsigned char *inBuf,
258 unsigned int inBufLen,
259 unsigned char *outBuf,
260 unsigned int maxOutBufLen,
261 unsigned int *outBufLen
262 )
263 {
264 PORT_Assert((unsigned int *)NULL != outBufLen);
265
266 if( toUnicode ) {
267 unsigned int i, len = 0;
268
269 for( i = 0; i < inBufLen; ) {
270 if( (inBuf[i] & 0x80) == 0x00 ) {
271 i += 1;
272 len += 2;
273 } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
274 i += 2;
275 len += 2;
276 } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
277 i += 3;
278 len += 2;
279 } else if( (inBuf[i] & 0xF8) == 0xF0 ) {
280 i += 4;
281 len += 4;
282 } else return PR_FALSE;
283 }
284
285 if( len > maxOutBufLen ) {
286 *outBufLen = len;
287 return PR_FALSE;
288 }
289
290 len = 0;
291
292 for( i = 0; i < inBufLen; ) {
293 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
294
295 if (ucs4 == BAD_UTF8) return PR_FALSE;
296
297 if( ucs4 < 0x10000) {
298 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
299 outBuf[len+H_1] = (unsigned char)ucs4;
300 len += 2;
301 } else {
302 ucs4 -= 0x10000;
303 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
304 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
305 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
306 outBuf[len+2+H_1] = (unsigned char)ucs4;
307 len += 4;
308 }
309 }
310
311 *outBufLen = len;
312 return PR_TRUE;
313 } else {
314 unsigned int i, len = 0;
315 PORT_Assert((inBufLen % 2) == 0);
316 if ((inBufLen % 2) != 0) {
317 *outBufLen = 0;
318 return PR_FALSE;
319 }
320
321 for( i = 0; i < inBufLen; i += 2 ) {
322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
323 else if( inBuf[i+H_0] < 0x08 ) len += 2;
324 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
325 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
326 i += 2;
327 len += 4;
328 } else {
329 return PR_FALSE;
330 }
331 }
332 else len += 3;
333 }
334
335 if( len > maxOutBufLen ) {
336 *outBufLen = len;
337 return PR_FALSE;
338 }
339
340 len = 0;
341
342 for( i = 0; i < inBufLen; i += 2 ) {
343 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
344 /* 0000-007F -> 0xxxxxx */
345 /* 00000000 0abcdefg -> 0abcdefg */
346
347 outBuf[len] = inBuf[i+H_1] & 0x7F;
348
349 len += 1;
350 } else if( inBuf[i+H_0] < 0x08 ) {
351 /* 0080-07FF -> 110xxxxx 10xxxxxx */
352 /* 00000abc defghijk -> 110abcde 10fghijk */
353
354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2)
355 | ((inBuf[i+H_1] & 0xC0) >> 6);
356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
357
358 len += 2;
359 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
360 int abcde, BCDE;
361
362 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
363
364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
365 /* 110110BC DEfghijk 110111lm nopqrstu ->
366 { Let abcde = BCDE + 1 }
367 11110abc 10defghi 10jklmno 10pqrstu */
368
369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
370 abcde = BCDE + 1;
371
372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
373 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4)
374 | ((inBuf[i+0+H_1] & 0x3C) >> 2);
375 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
376 | ((inBuf[i+2+H_0] & 0x03) << 2)
377 | ((inBuf[i+2+H_1] & 0xC0) >> 6);
378 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
379
380 i += 2;
381 len += 4;
382 } else {
383 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
384 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
385
386 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
387 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2)
388 | ((inBuf[i+H_1] & 0xC0) >> 6);
389 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
390
391 len += 3;
392 }
393 }
394
395 *outBufLen = len;
396 return PR_TRUE;
397 }
398 }
399
400 PRBool
401 sec_port_iso88591_utf8_conversion_function
402 (
403 const unsigned char *inBuf,
404 unsigned int inBufLen,
405 unsigned char *outBuf,
406 unsigned int maxOutBufLen,
407 unsigned int *outBufLen
408 )
409 {
410 unsigned int i, len = 0;
411
412 PORT_Assert((unsigned int *)NULL != outBufLen);
413
414 for( i = 0; i < inBufLen; i++) {
415 if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
416 else len += 2;
417 }
418
419 if( len > maxOutBufLen ) {
420 *outBufLen = len;
421 return PR_FALSE;
422 }
423
424 len = 0;
425
426 for( i = 0; i < inBufLen; i++) {
427 if( (inBuf[i] & 0x80) == 0x00 ) {
428 /* 00-7F -> 0xxxxxxx */
429 /* 0abcdefg -> 0abcdefg */
430
431 outBuf[len] = inBuf[i];
432 len += 1;
433 } else {
434 /* 80-FF <- 110xxxxx 10xxxxxx */
435 /* 00000000 abcdefgh -> 110000ab 10cdefgh */
436
437 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
438 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
439
440 len += 2;
441 }
442 }
443
444 *outBufLen = len;
445 return PR_TRUE;
446 }
447
448 #ifdef TEST_UTF8
449
450 #include <stdio.h>
451 #include <string.h>
452 #include <stdlib.h>
453 #include <netinet/in.h> /* for htonl and htons */
454
455 /*
456 * UCS-4 vectors
457 */
458
459 struct ucs4 {
460 PRUint32 c;
461 char *utf8;
462 };
463
464 /*
465 * UCS-2 vectors
466 */
467
468 struct ucs2 {
469 PRUint16 c;
470 char *utf8;
471 };
472
473 /*
474 * UTF-16 vectors
475 */
476
477 struct utf16 {
478 PRUint32 c;
479 PRUint16 w[2];
480 };
481
482
483 /*
484 * UCS-4 vectors
485 */
486
487 struct ucs4 ucs4[] = {
488 { 0x00000001, "\x01" },
489 { 0x00000002, "\x02" },
490 { 0x00000003, "\x03" },
491 { 0x00000004, "\x04" },
492 { 0x00000007, "\x07" },
493 { 0x00000008, "\x08" },
494 { 0x0000000F, "\x0F" },
495 { 0x00000010, "\x10" },
496 { 0x0000001F, "\x1F" },
497 { 0x00000020, "\x20" },
498 { 0x0000003F, "\x3F" },
499 { 0x00000040, "\x40" },
500 { 0x0000007F, "\x7F" },
501
502 { 0x00000080, "\xC2\x80" },
503 { 0x00000081, "\xC2\x81" },
504 { 0x00000082, "\xC2\x82" },
505 { 0x00000084, "\xC2\x84" },
506 { 0x00000088, "\xC2\x88" },
507 { 0x00000090, "\xC2\x90" },
508 { 0x000000A0, "\xC2\xA0" },
509 { 0x000000C0, "\xC3\x80" },
510 { 0x000000FF, "\xC3\xBF" },
511 { 0x00000100, "\xC4\x80" },
512 { 0x00000101, "\xC4\x81" },
513 { 0x00000102, "\xC4\x82" },
514 { 0x00000104, "\xC4\x84" },
515 { 0x00000108, "\xC4\x88" },
516 { 0x00000110, "\xC4\x90" },
517 { 0x00000120, "\xC4\xA0" },
518 { 0x00000140, "\xC5\x80" },
519 { 0x00000180, "\xC6\x80" },
520 { 0x000001FF, "\xC7\xBF" },
521 { 0x00000200, "\xC8\x80" },
522 { 0x00000201, "\xC8\x81" },
523 { 0x00000202, "\xC8\x82" },
524 { 0x00000204, "\xC8\x84" },
525 { 0x00000208, "\xC8\x88" },
526 { 0x00000210, "\xC8\x90" },
527 { 0x00000220, "\xC8\xA0" },
528 { 0x00000240, "\xC9\x80" },
529 { 0x00000280, "\xCA\x80" },
530 { 0x00000300, "\xCC\x80" },
531 { 0x000003FF, "\xCF\xBF" },
532 { 0x00000400, "\xD0\x80" },
533 { 0x00000401, "\xD0\x81" },
534 { 0x00000402, "\xD0\x82" },
535 { 0x00000404, "\xD0\x84" },
536 { 0x00000408, "\xD0\x88" },
537 { 0x00000410, "\xD0\x90" },
538 { 0x00000420, "\xD0\xA0" },
539 { 0x00000440, "\xD1\x80" },
540 { 0x00000480, "\xD2\x80" },
541 { 0x00000500, "\xD4\x80" },
542 { 0x00000600, "\xD8\x80" },
543 { 0x000007FF, "\xDF\xBF" },
544
545 { 0x00000800, "\xE0\xA0\x80" },
546 { 0x00000801, "\xE0\xA0\x81" },
547 { 0x00000802, "\xE0\xA0\x82" },
548 { 0x00000804, "\xE0\xA0\x84" },
549 { 0x00000808, "\xE0\xA0\x88" },
550 { 0x00000810, "\xE0\xA0\x90" },
551 { 0x00000820, "\xE0\xA0\xA0" },
552 { 0x00000840, "\xE0\xA1\x80" },
553 { 0x00000880, "\xE0\xA2\x80" },
554 { 0x00000900, "\xE0\xA4\x80" },
555 { 0x00000A00, "\xE0\xA8\x80" },
556 { 0x00000C00, "\xE0\xB0\x80" },
557 { 0x00000FFF, "\xE0\xBF\xBF" },
558 { 0x00001000, "\xE1\x80\x80" },
559 { 0x00001001, "\xE1\x80\x81" },
560 { 0x00001002, "\xE1\x80\x82" },
561 { 0x00001004, "\xE1\x80\x84" },
562 { 0x00001008, "\xE1\x80\x88" },
563 { 0x00001010, "\xE1\x80\x90" },
564 { 0x00001020, "\xE1\x80\xA0" },
565 { 0x00001040, "\xE1\x81\x80" },
566 { 0x00001080, "\xE1\x82\x80" },
567 { 0x00001100, "\xE1\x84\x80" },
568 { 0x00001200, "\xE1\x88\x80" },
569 { 0x00001400, "\xE1\x90\x80" },
570 { 0x00001800, "\xE1\xA0\x80" },
571 { 0x00001FFF, "\xE1\xBF\xBF" },
572 { 0x00002000, "\xE2\x80\x80" },
573 { 0x00002001, "\xE2\x80\x81" },
574 { 0x00002002, "\xE2\x80\x82" },
575 { 0x00002004, "\xE2\x80\x84" },
576 { 0x00002008, "\xE2\x80\x88" },
577 { 0x00002010, "\xE2\x80\x90" },
578 { 0x00002020, "\xE2\x80\xA0" },
579 { 0x00002040, "\xE2\x81\x80" },
580 { 0x00002080, "\xE2\x82\x80" },
581 { 0x00002100, "\xE2\x84\x80" },
582 { 0x00002200, "\xE2\x88\x80" },
583 { 0x00002400, "\xE2\x90\x80" },
584 { 0x00002800, "\xE2\xA0\x80" },
585 { 0x00003000, "\xE3\x80\x80" },
586 { 0x00003FFF, "\xE3\xBF\xBF" },
587 { 0x00004000, "\xE4\x80\x80" },
588 { 0x00004001, "\xE4\x80\x81" },
589 { 0x00004002, "\xE4\x80\x82" },
590 { 0x00004004, "\xE4\x80\x84" },
591 { 0x00004008, "\xE4\x80\x88" },
592 { 0x00004010, "\xE4\x80\x90" },
593 { 0x00004020, "\xE4\x80\xA0" },
594 { 0x00004040, "\xE4\x81\x80" },
595 { 0x00004080, "\xE4\x82\x80" },
596 { 0x00004100, "\xE4\x84\x80" },
597 { 0x00004200, "\xE4\x88\x80" },
598 { 0x00004400, "\xE4\x90\x80" },
599 { 0x00004800, "\xE4\xA0\x80" },
600 { 0x00005000, "\xE5\x80\x80" },
601 { 0x00006000, "\xE6\x80\x80" },
602 { 0x00007FFF, "\xE7\xBF\xBF" },
603 { 0x00008000, "\xE8\x80\x80" },
604 { 0x00008001, "\xE8\x80\x81" },
605 { 0x00008002, "\xE8\x80\x82" },
606 { 0x00008004, "\xE8\x80\x84" },
607 { 0x00008008, "\xE8\x80\x88" },
608 { 0x00008010, "\xE8\x80\x90" },
609 { 0x00008020, "\xE8\x80\xA0" },
610 { 0x00008040, "\xE8\x81\x80" },
611 { 0x00008080, "\xE8\x82\x80" },
612 { 0x00008100, "\xE8\x84\x80" },
613 { 0x00008200, "\xE8\x88\x80" },
614 { 0x00008400, "\xE8\x90\x80" },
615 { 0x00008800, "\xE8\xA0\x80" },
616 { 0x00009000, "\xE9\x80\x80" },
617 { 0x0000A000, "\xEA\x80\x80" },
618 { 0x0000C000, "\xEC\x80\x80" },
619 { 0x0000FFFF, "\xEF\xBF\xBF" },
620
621 { 0x00010000, "\xF0\x90\x80\x80" },
622 { 0x00010001, "\xF0\x90\x80\x81" },
623 { 0x00010002, "\xF0\x90\x80\x82" },
624 { 0x00010004, "\xF0\x90\x80\x84" },
625 { 0x00010008, "\xF0\x90\x80\x88" },
626 { 0x00010010, "\xF0\x90\x80\x90" },
627 { 0x00010020, "\xF0\x90\x80\xA0" },
628 { 0x00010040, "\xF0\x90\x81\x80" },
629 { 0x00010080, "\xF0\x90\x82\x80" },
630 { 0x00010100, "\xF0\x90\x84\x80" },
631 { 0x00010200, "\xF0\x90\x88\x80" },
632 { 0x00010400, "\xF0\x90\x90\x80" },
633 { 0x00010800, "\xF0\x90\xA0\x80" },
634 { 0x00011000, "\xF0\x91\x80\x80" },
635 { 0x00012000, "\xF0\x92\x80\x80" },
636 { 0x00014000, "\xF0\x94\x80\x80" },
637 { 0x00018000, "\xF0\x98\x80\x80" },
638 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
639 { 0x00020000, "\xF0\xA0\x80\x80" },
640 { 0x00020001, "\xF0\xA0\x80\x81" },
641 { 0x00020002, "\xF0\xA0\x80\x82" },
642 { 0x00020004, "\xF0\xA0\x80\x84" },
643 { 0x00020008, "\xF0\xA0\x80\x88" },
644 { 0x00020010, "\xF0\xA0\x80\x90" },
645 { 0x00020020, "\xF0\xA0\x80\xA0" },
646 { 0x00020040, "\xF0\xA0\x81\x80" },
647 { 0x00020080, "\xF0\xA0\x82\x80" },
648 { 0x00020100, "\xF0\xA0\x84\x80" },
649 { 0x00020200, "\xF0\xA0\x88\x80" },
650 { 0x00020400, "\xF0\xA0\x90\x80" },
651 { 0x00020800, "\xF0\xA0\xA0\x80" },
652 { 0x00021000, "\xF0\xA1\x80\x80" },
653 { 0x00022000, "\xF0\xA2\x80\x80" },
654 { 0x00024000, "\xF0\xA4\x80\x80" },
655 { 0x00028000, "\xF0\xA8\x80\x80" },
656 { 0x00030000, "\xF0\xB0\x80\x80" },
657 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
658 { 0x00040000, "\xF1\x80\x80\x80" },
659 { 0x00040001, "\xF1\x80\x80\x81" },
660 { 0x00040002, "\xF1\x80\x80\x82" },
661 { 0x00040004, "\xF1\x80\x80\x84" },
662 { 0x00040008, "\xF1\x80\x80\x88" },
663 { 0x00040010, "\xF1\x80\x80\x90" },
664 { 0x00040020, "\xF1\x80\x80\xA0" },
665 { 0x00040040, "\xF1\x80\x81\x80" },
666 { 0x00040080, "\xF1\x80\x82\x80" },
667 { 0x00040100, "\xF1\x80\x84\x80" },
668 { 0x00040200, "\xF1\x80\x88\x80" },
669 { 0x00040400, "\xF1\x80\x90\x80" },
670 { 0x00040800, "\xF1\x80\xA0\x80" },
671 { 0x00041000, "\xF1\x81\x80\x80" },
672 { 0x00042000, "\xF1\x82\x80\x80" },
673 { 0x00044000, "\xF1\x84\x80\x80" },
674 { 0x00048000, "\xF1\x88\x80\x80" },
675 { 0x00050000, "\xF1\x90\x80\x80" },
676 { 0x00060000, "\xF1\xA0\x80\x80" },
677 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
678 { 0x00080000, "\xF2\x80\x80\x80" },
679 { 0x00080001, "\xF2\x80\x80\x81" },
680 { 0x00080002, "\xF2\x80\x80\x82" },
681 { 0x00080004, "\xF2\x80\x80\x84" },
682 { 0x00080008, "\xF2\x80\x80\x88" },
683 { 0x00080010, "\xF2\x80\x80\x90" },
684 { 0x00080020, "\xF2\x80\x80\xA0" },
685 { 0x00080040, "\xF2\x80\x81\x80" },
686 { 0x00080080, "\xF2\x80\x82\x80" },
687 { 0x00080100, "\xF2\x80\x84\x80" },
688 { 0x00080200, "\xF2\x80\x88\x80" },
689 { 0x00080400, "\xF2\x80\x90\x80" },
690 { 0x00080800, "\xF2\x80\xA0\x80" },
691 { 0x00081000, "\xF2\x81\x80\x80" },
692 { 0x00082000, "\xF2\x82\x80\x80" },
693 { 0x00084000, "\xF2\x84\x80\x80" },
694 { 0x00088000, "\xF2\x88\x80\x80" },
695 { 0x00090000, "\xF2\x90\x80\x80" },
696 { 0x000A0000, "\xF2\xA0\x80\x80" },
697 { 0x000C0000, "\xF3\x80\x80\x80" },
698 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
699 { 0x00100000, "\xF4\x80\x80\x80" },
700 { 0x00100001, "\xF4\x80\x80\x81" },
701 { 0x00100002, "\xF4\x80\x80\x82" },
702 { 0x00100004, "\xF4\x80\x80\x84" },
703 { 0x00100008, "\xF4\x80\x80\x88" },
704 { 0x00100010, "\xF4\x80\x80\x90" },
705 { 0x00100020, "\xF4\x80\x80\xA0" },
706 { 0x00100040, "\xF4\x80\x81\x80" },
707 { 0x00100080, "\xF4\x80\x82\x80" },
708 { 0x00100100, "\xF4\x80\x84\x80" },
709 { 0x00100200, "\xF4\x80\x88\x80" },
710 { 0x00100400, "\xF4\x80\x90\x80" },
711 { 0x00100800, "\xF4\x80\xA0\x80" },
712 { 0x00101000, "\xF4\x81\x80\x80" },
713 { 0x00102000, "\xF4\x82\x80\x80" },
714 { 0x00104000, "\xF4\x84\x80\x80" },
715 { 0x00108000, "\xF4\x88\x80\x80" },
716 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
717 };
718
719 /*
720 * UCS-2 vectors
721 */
722
723 struct ucs2 ucs2[] = {
724 { 0x0001, "\x01" },
725 { 0x0002, "\x02" },
726 { 0x0003, "\x03" },
727 { 0x0004, "\x04" },
728 { 0x0007, "\x07" },
729 { 0x0008, "\x08" },
730 { 0x000F, "\x0F" },
731 { 0x0010, "\x10" },
732 { 0x001F, "\x1F" },
733 { 0x0020, "\x20" },
734 { 0x003F, "\x3F" },
735 { 0x0040, "\x40" },
736 { 0x007F, "\x7F" },
737
738 { 0x0080, "\xC2\x80" },
739 { 0x0081, "\xC2\x81" },
740 { 0x0082, "\xC2\x82" },
741 { 0x0084, "\xC2\x84" },
742 { 0x0088, "\xC2\x88" },
743 { 0x0090, "\xC2\x90" },
744 { 0x00A0, "\xC2\xA0" },
745 { 0x00C0, "\xC3\x80" },
746 { 0x00FF, "\xC3\xBF" },
747 { 0x0100, "\xC4\x80" },
748 { 0x0101, "\xC4\x81" },
749 { 0x0102, "\xC4\x82" },
750 { 0x0104, "\xC4\x84" },
751 { 0x0108, "\xC4\x88" },
752 { 0x0110, "\xC4\x90" },
753 { 0x0120, "\xC4\xA0" },
754 { 0x0140, "\xC5\x80" },
755 { 0x0180, "\xC6\x80" },
756 { 0x01FF, "\xC7\xBF" },
757 { 0x0200, "\xC8\x80" },
758 { 0x0201, "\xC8\x81" },
759 { 0x0202, "\xC8\x82" },
760 { 0x0204, "\xC8\x84" },
761 { 0x0208, "\xC8\x88" },
762 { 0x0210, "\xC8\x90" },
763 { 0x0220, "\xC8\xA0" },
764 { 0x0240, "\xC9\x80" },
765 { 0x0280, "\xCA\x80" },
766 { 0x0300, "\xCC\x80" },
767 { 0x03FF, "\xCF\xBF" },
768 { 0x0400, "\xD0\x80" },
769 { 0x0401, "\xD0\x81" },
770 { 0x0402, "\xD0\x82" },
771 { 0x0404, "\xD0\x84" },
772 { 0x0408, "\xD0\x88" },
773 { 0x0410, "\xD0\x90" },
774 { 0x0420, "\xD0\xA0" },
775 { 0x0440, "\xD1\x80" },
776 { 0x0480, "\xD2\x80" },
777 { 0x0500, "\xD4\x80" },
778 { 0x0600, "\xD8\x80" },
779 { 0x07FF, "\xDF\xBF" },
780
781 { 0x0800, "\xE0\xA0\x80" },
782 { 0x0801, "\xE0\xA0\x81" },
783 { 0x0802, "\xE0\xA0\x82" },
784 { 0x0804, "\xE0\xA0\x84" },
785 { 0x0808, "\xE0\xA0\x88" },
786 { 0x0810, "\xE0\xA0\x90" },
787 { 0x0820, "\xE0\xA0\xA0" },
788 { 0x0840, "\xE0\xA1\x80" },
789 { 0x0880, "\xE0\xA2\x80" },
790 { 0x0900, "\xE0\xA4\x80" },
791 { 0x0A00, "\xE0\xA8\x80" },
792 { 0x0C00, "\xE0\xB0\x80" },
793 { 0x0FFF, "\xE0\xBF\xBF" },
794 { 0x1000, "\xE1\x80\x80" },
795 { 0x1001, "\xE1\x80\x81" },
796 { 0x1002, "\xE1\x80\x82" },
797 { 0x1004, "\xE1\x80\x84" },
798 { 0x1008, "\xE1\x80\x88" },
799 { 0x1010, "\xE1\x80\x90" },
800 { 0x1020, "\xE1\x80\xA0" },
801 { 0x1040, "\xE1\x81\x80" },
802 { 0x1080, "\xE1\x82\x80" },
803 { 0x1100, "\xE1\x84\x80" },
804 { 0x1200, "\xE1\x88\x80" },
805 { 0x1400, "\xE1\x90\x80" },
806 { 0x1800, "\xE1\xA0\x80" },
807 { 0x1FFF, "\xE1\xBF\xBF" },
808 { 0x2000, "\xE2\x80\x80" },
809 { 0x2001, "\xE2\x80\x81" },
810 { 0x2002, "\xE2\x80\x82" },
811 { 0x2004, "\xE2\x80\x84" },
812 { 0x2008, "\xE2\x80\x88" },
813 { 0x2010, "\xE2\x80\x90" },
814 { 0x2020, "\xE2\x80\xA0" },
815 { 0x2040, "\xE2\x81\x80" },
816 { 0x2080, "\xE2\x82\x80" },
817 { 0x2100, "\xE2\x84\x80" },
818 { 0x2200, "\xE2\x88\x80" },
819 { 0x2400, "\xE2\x90\x80" },
820 { 0x2800, "\xE2\xA0\x80" },
821 { 0x3000, "\xE3\x80\x80" },
822 { 0x3FFF, "\xE3\xBF\xBF" },
823 { 0x4000, "\xE4\x80\x80" },
824 { 0x4001, "\xE4\x80\x81" },
825 { 0x4002, "\xE4\x80\x82" },
826 { 0x4004, "\xE4\x80\x84" },
827 { 0x4008, "\xE4\x80\x88" },
828 { 0x4010, "\xE4\x80\x90" },
829 { 0x4020, "\xE4\x80\xA0" },
830 { 0x4040, "\xE4\x81\x80" },
831 { 0x4080, "\xE4\x82\x80" },
832 { 0x4100, "\xE4\x84\x80" },
833 { 0x4200, "\xE4\x88\x80" },
834 { 0x4400, "\xE4\x90\x80" },
835 { 0x4800, "\xE4\xA0\x80" },
836 { 0x5000, "\xE5\x80\x80" },
837 { 0x6000, "\xE6\x80\x80" },
838 { 0x7FFF, "\xE7\xBF\xBF" },
839 { 0x8000, "\xE8\x80\x80" },
840 { 0x8001, "\xE8\x80\x81" },
841 { 0x8002, "\xE8\x80\x82" },
842 { 0x8004, "\xE8\x80\x84" },
843 { 0x8008, "\xE8\x80\x88" },
844 { 0x8010, "\xE8\x80\x90" },
845 { 0x8020, "\xE8\x80\xA0" },
846 { 0x8040, "\xE8\x81\x80" },
847 { 0x8080, "\xE8\x82\x80" },
848 { 0x8100, "\xE8\x84\x80" },
849 { 0x8200, "\xE8\x88\x80" },
850 { 0x8400, "\xE8\x90\x80" },
851 { 0x8800, "\xE8\xA0\x80" },
852 { 0x9000, "\xE9\x80\x80" },
853 { 0xA000, "\xEA\x80\x80" },
854 { 0xC000, "\xEC\x80\x80" },
855 { 0xFFFF, "\xEF\xBF\xBF" }
856
857 };
858
859 /*
860 * UTF-16 vectors
861 */
862
863 struct utf16 utf16[] = {
864 { 0x00010000, { 0xD800, 0xDC00 } },
865 { 0x00010001, { 0xD800, 0xDC01 } },
866 { 0x00010002, { 0xD800, 0xDC02 } },
867 { 0x00010003, { 0xD800, 0xDC03 } },
868 { 0x00010004, { 0xD800, 0xDC04 } },
869 { 0x00010007, { 0xD800, 0xDC07 } },
870 { 0x00010008, { 0xD800, 0xDC08 } },
871 { 0x0001000F, { 0xD800, 0xDC0F } },
872 { 0x00010010, { 0xD800, 0xDC10 } },
873 { 0x0001001F, { 0xD800, 0xDC1F } },
874 { 0x00010020, { 0xD800, 0xDC20 } },
875 { 0x0001003F, { 0xD800, 0xDC3F } },
876 { 0x00010040, { 0xD800, 0xDC40 } },
877 { 0x0001007F, { 0xD800, 0xDC7F } },
878 { 0x00010080, { 0xD800, 0xDC80 } },
879 { 0x00010081, { 0xD800, 0xDC81 } },
880 { 0x00010082, { 0xD800, 0xDC82 } },
881 { 0x00010084, { 0xD800, 0xDC84 } },
882 { 0x00010088, { 0xD800, 0xDC88 } },
883 { 0x00010090, { 0xD800, 0xDC90 } },
884 { 0x000100A0, { 0xD800, 0xDCA0 } },
885 { 0x000100C0, { 0xD800, 0xDCC0 } },
886 { 0x000100FF, { 0xD800, 0xDCFF } },
887 { 0x00010100, { 0xD800, 0xDD00 } },
888 { 0x00010101, { 0xD800, 0xDD01 } },
889 { 0x00010102, { 0xD800, 0xDD02 } },
890 { 0x00010104, { 0xD800, 0xDD04 } },
891 { 0x00010108, { 0xD800, 0xDD08 } },
892 { 0x00010110, { 0xD800, 0xDD10 } },
893 { 0x00010120, { 0xD800, 0xDD20 } },
894 { 0x00010140, { 0xD800, 0xDD40 } },
895 { 0x00010180, { 0xD800, 0xDD80 } },
896 { 0x000101FF, { 0xD800, 0xDDFF } },
897 { 0x00010200, { 0xD800, 0xDE00 } },
898 { 0x00010201, { 0xD800, 0xDE01 } },
899 { 0x00010202, { 0xD800, 0xDE02 } },
900 { 0x00010204, { 0xD800, 0xDE04 } },
901 { 0x00010208, { 0xD800, 0xDE08 } },
902 { 0x00010210, { 0xD800, 0xDE10 } },
903 { 0x00010220, { 0xD800, 0xDE20 } },
904 { 0x00010240, { 0xD800, 0xDE40 } },
905 { 0x00010280, { 0xD800, 0xDE80 } },
906 { 0x00010300, { 0xD800, 0xDF00 } },
907 { 0x000103FF, { 0xD800, 0xDFFF } },
908 { 0x00010400, { 0xD801, 0xDC00 } },
909 { 0x00010401, { 0xD801, 0xDC01 } },
910 { 0x00010402, { 0xD801, 0xDC02 } },
911 { 0x00010404, { 0xD801, 0xDC04 } },
912 { 0x00010408, { 0xD801, 0xDC08 } },
913 { 0x00010410, { 0xD801, 0xDC10 } },
914 { 0x00010420, { 0xD801, 0xDC20 } },
915 { 0x00010440, { 0xD801, 0xDC40 } },
916 { 0x00010480, { 0xD801, 0xDC80 } },
917 { 0x00010500, { 0xD801, 0xDD00 } },
918 { 0x00010600, { 0xD801, 0xDE00 } },
919 { 0x000107FF, { 0xD801, 0xDFFF } },
920 { 0x00010800, { 0xD802, 0xDC00 } },
921 { 0x00010801, { 0xD802, 0xDC01 } },
922 { 0x00010802, { 0xD802, 0xDC02 } },
923 { 0x00010804, { 0xD802, 0xDC04 } },
924 { 0x00010808, { 0xD802, 0xDC08 } },
925 { 0x00010810, { 0xD802, 0xDC10 } },
926 { 0x00010820, { 0xD802, 0xDC20 } },
927 { 0x00010840, { 0xD802, 0xDC40 } },
928 { 0x00010880, { 0xD802, 0xDC80 } },
929 { 0x00010900, { 0xD802, 0xDD00 } },
930 { 0x00010A00, { 0xD802, 0xDE00 } },
931 { 0x00010C00, { 0xD803, 0xDC00 } },
932 { 0x00010FFF, { 0xD803, 0xDFFF } },
933 { 0x00011000, { 0xD804, 0xDC00 } },
934 { 0x00011001, { 0xD804, 0xDC01 } },
935 { 0x00011002, { 0xD804, 0xDC02 } },
936 { 0x00011004, { 0xD804, 0xDC04 } },
937 { 0x00011008, { 0xD804, 0xDC08 } },
938 { 0x00011010, { 0xD804, 0xDC10 } },
939 { 0x00011020, { 0xD804, 0xDC20 } },
940 { 0x00011040, { 0xD804, 0xDC40 } },
941 { 0x00011080, { 0xD804, 0xDC80 } },
942 { 0x00011100, { 0xD804, 0xDD00 } },
943 { 0x00011200, { 0xD804, 0xDE00 } },
944 { 0x00011400, { 0xD805, 0xDC00 } },
945 { 0x00011800, { 0xD806, 0xDC00 } },
946 { 0x00011FFF, { 0xD807, 0xDFFF } },
947 { 0x00012000, { 0xD808, 0xDC00 } },
948 { 0x00012001, { 0xD808, 0xDC01 } },
949 { 0x00012002, { 0xD808, 0xDC02 } },
950 { 0x00012004, { 0xD808, 0xDC04 } },
951 { 0x00012008, { 0xD808, 0xDC08 } },
952 { 0x00012010, { 0xD808, 0xDC10 } },
953 { 0x00012020, { 0xD808, 0xDC20 } },
954 { 0x00012040, { 0xD808, 0xDC40 } },
955 { 0x00012080, { 0xD808, 0xDC80 } },
956 { 0x00012100, { 0xD808, 0xDD00 } },
957 { 0x00012200, { 0xD808, 0xDE00 } },
958 { 0x00012400, { 0xD809, 0xDC00 } },
959 { 0x00012800, { 0xD80A, 0xDC00 } },
960 { 0x00013000, { 0xD80C, 0xDC00 } },
961 { 0x00013FFF, { 0xD80F, 0xDFFF } },
962 { 0x00014000, { 0xD810, 0xDC00 } },
963 { 0x00014001, { 0xD810, 0xDC01 } },
964 { 0x00014002, { 0xD810, 0xDC02 } },
965 { 0x00014004, { 0xD810, 0xDC04 } },
966 { 0x00014008, { 0xD810, 0xDC08 } },
967 { 0x00014010, { 0xD810, 0xDC10 } },
968 { 0x00014020, { 0xD810, 0xDC20 } },
969 { 0x00014040, { 0xD810, 0xDC40 } },
970 { 0x00014080, { 0xD810, 0xDC80 } },
971 { 0x00014100, { 0xD810, 0xDD00 } },
972 { 0x00014200, { 0xD810, 0xDE00 } },
973 { 0x00014400, { 0xD811, 0xDC00 } },
974 { 0x00014800, { 0xD812, 0xDC00 } },
975 { 0x00015000, { 0xD814, 0xDC00 } },
976 { 0x00016000, { 0xD818, 0xDC00 } },
977 { 0x00017FFF, { 0xD81F, 0xDFFF } },
978 { 0x00018000, { 0xD820, 0xDC00 } },
979 { 0x00018001, { 0xD820, 0xDC01 } },
980 { 0x00018002, { 0xD820, 0xDC02 } },
981 { 0x00018004, { 0xD820, 0xDC04 } },
982 { 0x00018008, { 0xD820, 0xDC08 } },
983 { 0x00018010, { 0xD820, 0xDC10 } },
984 { 0x00018020, { 0xD820, 0xDC20 } },
985 { 0x00018040, { 0xD820, 0xDC40 } },
986 { 0x00018080, { 0xD820, 0xDC80 } },
987 { 0x00018100, { 0xD820, 0xDD00 } },
988 { 0x00018200, { 0xD820, 0xDE00 } },
989 { 0x00018400, { 0xD821, 0xDC00 } },
990 { 0x00018800, { 0xD822, 0xDC00 } },
991 { 0x00019000, { 0xD824, 0xDC00 } },
992 { 0x0001A000, { 0xD828, 0xDC00 } },
993 { 0x0001C000, { 0xD830, 0xDC00 } },
994 { 0x0001FFFF, { 0xD83F, 0xDFFF } },
995 { 0x00020000, { 0xD840, 0xDC00 } },
996 { 0x00020001, { 0xD840, 0xDC01 } },
997 { 0x00020002, { 0xD840, 0xDC02 } },
998 { 0x00020004, { 0xD840, 0xDC04 } },
999 { 0x00020008, { 0xD840, 0xDC08 } },
1000 { 0x00020010, { 0xD840, 0xDC10 } },
1001 { 0x00020020, { 0xD840, 0xDC20 } },
1002 { 0x00020040, { 0xD840, 0xDC40 } },
1003 { 0x00020080, { 0xD840, 0xDC80 } },
1004 { 0x00020100, { 0xD840, 0xDD00 } },
1005 { 0x00020200, { 0xD840, 0xDE00 } },
1006 { 0x00020400, { 0xD841, 0xDC00 } },
1007 { 0x00020800, { 0xD842, 0xDC00 } },
1008 { 0x00021000, { 0xD844, 0xDC00 } },
1009 { 0x00022000, { 0xD848, 0xDC00 } },
1010 { 0x00024000, { 0xD850, 0xDC00 } },
1011 { 0x00028000, { 0xD860, 0xDC00 } },
1012 { 0x0002FFFF, { 0xD87F, 0xDFFF } },
1013 { 0x00030000, { 0xD880, 0xDC00 } },
1014 { 0x00030001, { 0xD880, 0xDC01 } },
1015 { 0x00030002, { 0xD880, 0xDC02 } },
1016 { 0x00030004, { 0xD880, 0xDC04 } },
1017 { 0x00030008, { 0xD880, 0xDC08 } },
1018 { 0x00030010, { 0xD880, 0xDC10 } },
1019 { 0x00030020, { 0xD880, 0xDC20 } },
1020 { 0x00030040, { 0xD880, 0xDC40 } },
1021 { 0x00030080, { 0xD880, 0xDC80 } },
1022 { 0x00030100, { 0xD880, 0xDD00 } },
1023 { 0x00030200, { 0xD880, 0xDE00 } },
1024 { 0x00030400, { 0xD881, 0xDC00 } },
1025 { 0x00030800, { 0xD882, 0xDC00 } },
1026 { 0x00031000, { 0xD884, 0xDC00 } },
1027 { 0x00032000, { 0xD888, 0xDC00 } },
1028 { 0x00034000, { 0xD890, 0xDC00 } },
1029 { 0x00038000, { 0xD8A0, 0xDC00 } },
1030 { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
1031 { 0x00040000, { 0xD8C0, 0xDC00 } },
1032 { 0x00040001, { 0xD8C0, 0xDC01 } },
1033 { 0x00040002, { 0xD8C0, 0xDC02 } },
1034 { 0x00040004, { 0xD8C0, 0xDC04 } },
1035 { 0x00040008, { 0xD8C0, 0xDC08 } },
1036 { 0x00040010, { 0xD8C0, 0xDC10 } },
1037 { 0x00040020, { 0xD8C0, 0xDC20 } },
1038 { 0x00040040, { 0xD8C0, 0xDC40 } },
1039 { 0x00040080, { 0xD8C0, 0xDC80 } },
1040 { 0x00040100, { 0xD8C0, 0xDD00 } },
1041 { 0x00040200, { 0xD8C0, 0xDE00 } },
1042 { 0x00040400, { 0xD8C1, 0xDC00 } },
1043 { 0x00040800, { 0xD8C2, 0xDC00 } },
1044 { 0x00041000, { 0xD8C4, 0xDC00 } },
1045 { 0x00042000, { 0xD8C8, 0xDC00 } },
1046 { 0x00044000, { 0xD8D0, 0xDC00 } },
1047 { 0x00048000, { 0xD8E0, 0xDC00 } },
1048 { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
1049 { 0x00050000, { 0xD900, 0xDC00 } },
1050 { 0x00050001, { 0xD900, 0xDC01 } },
1051 { 0x00050002, { 0xD900, 0xDC02 } },
1052 { 0x00050004, { 0xD900, 0xDC04 } },
1053 { 0x00050008, { 0xD900, 0xDC08 } },
1054 { 0x00050010, { 0xD900, 0xDC10 } },
1055 { 0x00050020, { 0xD900, 0xDC20 } },
1056 { 0x00050040, { 0xD900, 0xDC40 } },
1057 { 0x00050080, { 0xD900, 0xDC80 } },
1058 { 0x00050100, { 0xD900, 0xDD00 } },
1059 { 0x00050200, { 0xD900, 0xDE00 } },
1060 { 0x00050400, { 0xD901, 0xDC00 } },
1061 { 0x00050800, { 0xD902, 0xDC00 } },
1062 { 0x00051000, { 0xD904, 0xDC00 } },
1063 { 0x00052000, { 0xD908, 0xDC00 } },
1064 { 0x00054000, { 0xD910, 0xDC00 } },
1065 { 0x00058000, { 0xD920, 0xDC00 } },
1066 { 0x00060000, { 0xD940, 0xDC00 } },
1067 { 0x00070000, { 0xD980, 0xDC00 } },
1068 { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
1069 { 0x00080000, { 0xD9C0, 0xDC00 } },
1070 { 0x00080001, { 0xD9C0, 0xDC01 } },
1071 { 0x00080002, { 0xD9C0, 0xDC02 } },
1072 { 0x00080004, { 0xD9C0, 0xDC04 } },
1073 { 0x00080008, { 0xD9C0, 0xDC08 } },
1074 { 0x00080010, { 0xD9C0, 0xDC10 } },
1075 { 0x00080020, { 0xD9C0, 0xDC20 } },
1076 { 0x00080040, { 0xD9C0, 0xDC40 } },
1077 { 0x00080080, { 0xD9C0, 0xDC80 } },
1078 { 0x00080100, { 0xD9C0, 0xDD00 } },
1079 { 0x00080200, { 0xD9C0, 0xDE00 } },
1080 { 0x00080400, { 0xD9C1, 0xDC00 } },
1081 { 0x00080800, { 0xD9C2, 0xDC00 } },
1082 { 0x00081000, { 0xD9C4, 0xDC00 } },
1083 { 0x00082000, { 0xD9C8, 0xDC00 } },
1084 { 0x00084000, { 0xD9D0, 0xDC00 } },
1085 { 0x00088000, { 0xD9E0, 0xDC00 } },
1086 { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
1087 { 0x00090000, { 0xDA00, 0xDC00 } },
1088 { 0x00090001, { 0xDA00, 0xDC01 } },
1089 { 0x00090002, { 0xDA00, 0xDC02 } },
1090 { 0x00090004, { 0xDA00, 0xDC04 } },
1091 { 0x00090008, { 0xDA00, 0xDC08 } },
1092 { 0x00090010, { 0xDA00, 0xDC10 } },
1093 { 0x00090020, { 0xDA00, 0xDC20 } },
1094 { 0x00090040, { 0xDA00, 0xDC40 } },
1095 { 0x00090080, { 0xDA00, 0xDC80 } },
1096 { 0x00090100, { 0xDA00, 0xDD00 } },
1097 { 0x00090200, { 0xDA00, 0xDE00 } },
1098 { 0x00090400, { 0xDA01, 0xDC00 } },
1099 { 0x00090800, { 0xDA02, 0xDC00 } },
1100 { 0x00091000, { 0xDA04, 0xDC00 } },
1101 { 0x00092000, { 0xDA08, 0xDC00 } },
1102 { 0x00094000, { 0xDA10, 0xDC00 } },
1103 { 0x00098000, { 0xDA20, 0xDC00 } },
1104 { 0x000A0000, { 0xDA40, 0xDC00 } },
1105 { 0x000B0000, { 0xDA80, 0xDC00 } },
1106 { 0x000C0000, { 0xDAC0, 0xDC00 } },
1107 { 0x000D0000, { 0xDB00, 0xDC00 } },
1108 { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
1109 { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
1110
1111 };
1112
1113 /* illegal utf8 sequences */
1114 char *utf8_bad[] = {
1115 "\xC0\x80",
1116 "\xC1\xBF",
1117 "\xE0\x80\x80",
1118 "\xE0\x9F\xBF",
1119 "\xF0\x80\x80\x80",
1120 "\xF0\x8F\xBF\xBF",
1121 "\xF4\x90\x80\x80",
1122 "\xF7\xBF\xBF\xBF",
1123 "\xF8\x80\x80\x80\x80",
1124 "\xF8\x88\x80\x80\x80",
1125 "\xF8\x92\x80\x80\x80",
1126 "\xF8\x9F\xBF\xBF\xBF",
1127 "\xF8\xA0\x80\x80\x80",
1128 "\xF8\xA8\x80\x80\x80",
1129 "\xF8\xB0\x80\x80\x80",
1130 "\xF8\xBF\xBF\xBF\xBF",
1131 "\xF9\x80\x80\x80\x88",
1132 "\xF9\x84\x80\x80\x80",
1133 "\xF9\xBF\xBF\xBF\xBF",
1134 "\xFA\x80\x80\x80\x80",
1135 "\xFA\x90\x80\x80\x80",
1136 "\xFB\xBF\xBF\xBF\xBF",
1137 "\xFC\x84\x80\x80\x80\x81",
1138 "\xFC\x85\x80\x80\x80\x80",
1139 "\xFC\x86\x80\x80\x80\x80",
1140 "\xFC\x87\xBF\xBF\xBF\xBF",
1141 "\xFC\x88\xA0\x80\x80\x80",
1142 "\xFC\x89\x80\x80\x80\x80",
1143 "\xFC\x8A\x80\x80\x80\x80",
1144 "\xFC\x90\x80\x80\x80\x82",
1145 "\xFD\x80\x80\x80\x80\x80",
1146 "\xFD\xBF\xBF\xBF\xBF\xBF",
1147 "\x80",
1148 "\xC3",
1149 "\xC3\xC3\x80",
1150 "\xED\xA0\x80",
1151 "\xED\xBF\x80",
1152 "\xED\xBF\xBF",
1153 "\xED\xA0\x80\xE0\xBF\xBF",
1154 };
1155
1156 static void
1157 dump_utf8
1158 (
1159 char *word,
1160 unsigned char *utf8,
1161 char *end
1162 )
1163 {
1164 fprintf(stdout, "%s ", word);
1165 for( ; *utf8; utf8++ ) {
1166 fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
1167 }
1168 fprintf(stdout, "%s", end);
1169 }
1170
1171 static PRBool
1172 test_ucs4_chars
1173 (
1174 void
1175 )
1176 {
1177 PRBool rv = PR_TRUE;
1178 int i;
1179
1180 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1181 struct ucs4 *e = &ucs4[i];
1182 PRBool result;
1183 unsigned char utf8[8];
1184 unsigned int len = 0;
1185 PRUint32 back = 0;
1186
1187 (void)memset(utf8, 0, sizeof(utf8));
1188
1189 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1190 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1191
1192 if( !result ) {
1193 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
1194 rv = PR_FALSE;
1195 continue;
1196 }
1197
1198 if( (len >= sizeof(utf8)) ||
1199 (strlen(e->utf8) != len) ||
1200 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1201 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
1202 dump_utf8("expected", e->utf8, ", ");
1203 dump_utf8("received", utf8, "\n");
1204 rv = PR_FALSE;
1205 continue;
1206 }
1207
1208 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1209 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1210
1211 if( !result ) {
1212 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
1213 rv = PR_FALSE;
1214 continue;
1215 }
1216
1217 if( (sizeof(back) != len) || (e->c != back) ) {
1218 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
1219 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1220 rv = PR_FALSE;
1221 continue;
1222 }
1223 }
1224
1225 return rv;
1226 }
1227
1228 static PRBool
1229 test_ucs2_chars
1230 (
1231 void
1232 )
1233 {
1234 PRBool rv = PR_TRUE;
1235 int i;
1236
1237 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1238 struct ucs2 *e = &ucs2[i];
1239 PRBool result;
1240 unsigned char utf8[8];
1241 unsigned int len = 0;
1242 PRUint16 back = 0;
1243
1244 (void)memset(utf8, 0, sizeof(utf8));
1245
1246 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1247 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1248
1249 if( !result ) {
1250 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
1251 rv = PR_FALSE;
1252 continue;
1253 }
1254
1255 if( (len >= sizeof(utf8)) ||
1256 (strlen(e->utf8) != len) ||
1257 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1258 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
1259 dump_utf8("expected", e->utf8, ", ");
1260 dump_utf8("received", utf8, "\n");
1261 rv = PR_FALSE;
1262 continue;
1263 }
1264
1265 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1266 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1267
1268 if( !result ) {
1269 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
1270 rv = PR_FALSE;
1271 continue;
1272 }
1273
1274 if( (sizeof(back) != len) || (e->c != back) ) {
1275 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
1276 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1277 rv = PR_FALSE;
1278 continue;
1279 }
1280 }
1281
1282 return rv;
1283 }
1284
1285 static PRBool
1286 test_utf16_chars
1287 (
1288 void
1289 )
1290 {
1291 PRBool rv = PR_TRUE;
1292 int i;
1293
1294 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1295 struct utf16 *e = &utf16[i];
1296 PRBool result;
1297 unsigned char utf8[8];
1298 unsigned int len = 0;
1299 PRUint32 back32 = 0;
1300 PRUint16 back[2];
1301
1302 (void)memset(utf8, 0, sizeof(utf8));
1303
1304 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1305 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
1306
1307 if( !result ) {
1308 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n",
1309 e->w[0], e->w[1]);
1310 rv = PR_FALSE;
1311 continue;
1312 }
1313
1314 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1315 utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
1316
1317 if( 4 != len ) {
1318 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
1319 "unexpected len %d\n", e->w[0], e->w[1], len);
1320 rv = PR_FALSE;
1321 continue;
1322 }
1323
1324 utf8[len] = '\0'; /* null-terminate for printing */
1325
1326 if( !result ) {
1327 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
1328 rv = PR_FALSE;
1329 continue;
1330 }
1331
1332 if( (sizeof(back32) != len) || (e->c != back32) ) {
1333 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ",
1334 e->w[0], e->w[1]);
1335 dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
1336 if( sizeof(back32) != len ) {
1337 fprintf(stdout, "len is %d\n", len);
1338 } else {
1339 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
1340 }
1341 rv = PR_FALSE;
1342 continue;
1343 }
1344
1345 (void)memset(utf8, 0, sizeof(utf8));
1346 back[0] = back[1] = 0;
1347
1348 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1349 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1350
1351 if( !result ) {
1352 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n",
1353 e->c);
1354 rv = PR_FALSE;
1355 continue;
1356 }
1357
1358 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1359 utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
1360
1361 if( 4 != len ) {
1362 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
1363 "unexpected len %d\n", e->c, len);
1364 rv = PR_FALSE;
1365 continue;
1366 }
1367
1368 utf8[len] = '\0'; /* null-terminate for printing */
1369
1370 if( !result ) {
1371 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
1372 rv = PR_FALSE;
1373 continue;
1374 }
1375
1376 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
1377 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
1378 dump_utf8("", utf8, "and then to UTF-16:");
1379 if( sizeof(back) != len ) {
1380 fprintf(stdout, "len is %d\n", len);
1381 } else {
1382 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n",
1383 e->w[0], e->w[1], back[0], back[1]);
1384 }
1385 rv = PR_FALSE;
1386 continue;
1387 }
1388 }
1389
1390 return rv;
1391 }
1392
1393 static PRBool
1394 test_utf8_bad_chars
1395 (
1396 void
1397 )
1398 {
1399 PRBool rv = PR_TRUE;
1400 int i;
1401
1402 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
1403 PRBool result;
1404 unsigned char destbuf[30];
1405 unsigned int len = 0;
1406
1407 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1408 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
1409
1410 if( result ) {
1411 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n");
1412 rv = PR_FALSE;
1413 continue;
1414 }
1415 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1416 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
1417
1418 if( result ) {
1419 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n");
1420 rv = PR_FALSE;
1421 continue;
1422 }
1423
1424 }
1425
1426 return rv;
1427 }
1428
1429 static PRBool
1430 test_iso88591_chars
1431 (
1432 void
1433 )
1434 {
1435 PRBool rv = PR_TRUE;
1436 int i;
1437
1438 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1439 struct ucs2 *e = &ucs2[i];
1440 PRBool result;
1441 unsigned char iso88591;
1442 unsigned char utf8[3];
1443 unsigned int len = 0;
1444
1445 if (ntohs(e->c) > 0xFF) continue;
1446
1447 (void)memset(utf8, 0, sizeof(utf8));
1448 iso88591 = ntohs(e->c);
1449
1450 result = sec_port_iso88591_utf8_conversion_function(&iso88591,
1451 1, utf8, sizeof(utf8), &len);
1452
1453 if( !result ) {
1454 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591);
1455 rv = PR_FALSE;
1456 continue;
1457 }
1458
1459 if( (len >= sizeof(utf8)) ||
1460 (strlen(e->utf8) != len) ||
1461 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1462 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591);
1463 dump_utf8("expected", e->utf8, ", ");
1464 dump_utf8("received", utf8, "\n");
1465 rv = PR_FALSE;
1466 continue;
1467 }
1468
1469 }
1470
1471 return rv;
1472 }
1473
1474 static PRBool
1475 test_zeroes
1476 (
1477 void
1478 )
1479 {
1480 PRBool rv = PR_TRUE;
1481 PRBool result;
1482 PRUint32 lzero = 0;
1483 PRUint16 szero = 0;
1484 unsigned char utf8[8];
1485 unsigned int len = 0;
1486 PRUint32 lback = 1;
1487 PRUint16 sback = 1;
1488
1489 (void)memset(utf8, 1, sizeof(utf8));
1490
1491 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1492 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
1493
1494 if( !result ) {
1495 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
1496 rv = PR_FALSE;
1497 } else if( 1 != len ) {
1498 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
1499 rv = PR_FALSE;
1500 } else if( '\0' != *utf8 ) {
1501 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
1502 "received %02.2x\n", (unsigned int)*utf8);
1503 rv = PR_FALSE;
1504 }
1505
1506 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1507 "", 1, (unsigned char *)&lback, sizeof(lback), &len);
1508
1509 if( !result ) {
1510 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
1511 rv = PR_FALSE;
1512 } else if( 4 != len ) {
1513 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
1514 rv = PR_FALSE;
1515 } else if( 0 != lback ) {
1516 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
1517 "expected 0x00000000, received 0x%08.8x\n", lback);
1518 rv = PR_FALSE;
1519 }
1520
1521 (void)memset(utf8, 1, sizeof(utf8));
1522
1523 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1524 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
1525
1526 if( !result ) {
1527 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
1528 rv = PR_FALSE;
1529 } else if( 1 != len ) {
1530 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
1531 rv = PR_FALSE;
1532 } else if( '\0' != *utf8 ) {
1533 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
1534 "received %02.2x\n", (unsigned int)*utf8);
1535 rv = PR_FALSE;
1536 }
1537
1538 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1539 "", 1, (unsigned char *)&sback, sizeof(sback), &len);
1540
1541 if( !result ) {
1542 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
1543 rv = PR_FALSE;
1544 } else if( 2 != len ) {
1545 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
1546 rv = PR_FALSE;
1547 } else if( 0 != sback ) {
1548 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
1549 "expected 0x0000, received 0x%04.4x\n", sback);
1550 rv = PR_FALSE;
1551 }
1552
1553 return rv;
1554 }
1555
1556 static PRBool
1557 test_multichars
1558 (
1559 void
1560 )
1561 {
1562 int i;
1563 unsigned int len, lenout;
1564 PRUint32 *ucs4s;
1565 char *ucs4_utf8;
1566 PRUint16 *ucs2s;
1567 char *ucs2_utf8;
1568 void *tmp;
1569 PRBool result;
1570
1571 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
1572 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
1573
1574 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
1575 fprintf(stderr, "out of memory\n");
1576 exit(1);
1577 }
1578
1579 len = 0;
1580 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1581 ucs4s[i] = ucs4[i].c;
1582 len += strlen(ucs4[i].utf8);
1583 }
1584
1585 ucs4_utf8 = (char *)malloc(len);
1586
1587 len = 0;
1588 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1589 ucs2s[i] = ucs2[i].c;
1590 len += strlen(ucs2[i].utf8);
1591 }
1592
1593 ucs2_utf8 = (char *)malloc(len);
1594
1595 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
1596 fprintf(stderr, "out of memory\n");
1597 exit(1);
1598 }
1599
1600 *ucs4_utf8 = '\0';
1601 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1602 strcat(ucs4_utf8, ucs4[i].utf8);
1603 }
1604
1605 *ucs2_utf8 = '\0';
1606 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1607 strcat(ucs2_utf8, ucs2[i].utf8);
1608 }
1609
1610 /* UTF-8 -> UCS-4 */
1611 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
1612 tmp = calloc(len, 1);
1613 if( (void *)NULL == tmp ) {
1614 fprintf(stderr, "out of memory\n");
1615 exit(1);
1616 }
1617
1618 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1619 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
1620 if( !result ) {
1621 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
1622 goto done;
1623 }
1624
1625 if( lenout != len ) {
1626 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
1627 goto loser;
1628 }
1629
1630 if( 0 != memcmp(ucs4s, tmp, len) ) {
1631 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
1632 goto loser;
1633 }
1634
1635 free(tmp); tmp = (void *)NULL;
1636
1637 /* UCS-4 -> UTF-8 */
1638 len = strlen(ucs4_utf8);
1639 tmp = calloc(len, 1);
1640 if( (void *)NULL == tmp ) {
1641 fprintf(stderr, "out of memory\n");
1642 exit(1);
1643 }
1644
1645 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1646 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32),
1647 tmp, len, &lenout);
1648 if( !result ) {
1649 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
1650 goto done;
1651 }
1652
1653 if( lenout != len ) {
1654 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
1655 goto loser;
1656 }
1657
1658 if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
1659 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
1660 goto loser;
1661 }
1662
1663 free(tmp); tmp = (void *)NULL;
1664
1665 /* UTF-8 -> UCS-2 */
1666 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
1667 tmp = calloc(len, 1);
1668 if( (void *)NULL == tmp ) {
1669 fprintf(stderr, "out of memory\n");
1670 exit(1);
1671 }
1672
1673 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1674 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
1675 if( !result ) {
1676 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
1677 goto done;
1678 }
1679
1680 if( lenout != len ) {
1681 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
1682 goto loser;
1683 }
1684
1685 if( 0 != memcmp(ucs2s, tmp, len) ) {
1686 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
1687 goto loser;
1688 }
1689
1690 free(tmp); tmp = (void *)NULL;
1691
1692 /* UCS-2 -> UTF-8 */
1693 len = strlen(ucs2_utf8);
1694 tmp = calloc(len, 1);
1695 if( (void *)NULL == tmp ) {
1696 fprintf(stderr, "out of memory\n");
1697 exit(1);
1698 }
1699
1700 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1701 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16),
1702 tmp, len, &lenout);
1703 if( !result ) {
1704 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
1705 goto done;
1706 }
1707
1708 if( lenout != len ) {
1709 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
1710 goto loser;
1711 }
1712
1713 if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
1714 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
1715 goto loser;
1716 }
1717
1718 /* implement UTF16 */
1719
1720 result = PR_TRUE;
1721 goto done;
1722
1723 loser:
1724 result = PR_FALSE;
1725 done:
1726 free(ucs4s);
1727 free(ucs4_utf8);
1728 free(ucs2s);
1729 free(ucs2_utf8);
1730 if( (void *)NULL != tmp ) free(tmp);
1731 return result;
1732 }
1733
1734 void
1735 byte_order
1736 (
1737 void
1738 )
1739 {
1740 /*
1741 * The implementation (now) expects the 16- and 32-bit characters
1742 * to be in network byte order, not host byte order. Therefore I
1743 * have to byteswap all those test vectors above. hton[ls] may be
1744 * functions, so I have to do this dynamically. If you want to
1745 * use this code to do host byte order conversions, just remove
1746 * the call in main() to this function.
1747 */
1748
1749 int i;
1750
1751 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1752 struct ucs4 *e = &ucs4[i];
1753 e->c = htonl(e->c);
1754 }
1755
1756 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1757 struct ucs2 *e = &ucs2[i];
1758 e->c = htons(e->c);
1759 }
1760
1761 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1762 struct utf16 *e = &utf16[i];
1763 e->c = htonl(e->c);
1764 e->w[0] = htons(e->w[0]);
1765 e->w[1] = htons(e->w[1]);
1766 }
1767
1768 return;
1769 }
1770
1771 int
1772 main
1773 (
1774 int argc,
1775 char *argv[]
1776 )
1777 {
1778 byte_order();
1779
1780 if( test_ucs4_chars() &&
1781 test_ucs2_chars() &&
1782 test_utf16_chars() &&
1783 test_utf8_bad_chars() &&
1784 test_iso88591_chars() &&
1785 test_zeroes() &&
1786 test_multichars() &&
1787 PR_TRUE ) {
1788 fprintf(stderr, "PASS\n");
1789 return 1;
1790 } else {
1791 fprintf(stderr, "FAIL\n");
1792 return 0;
1793 }
1794 }
1795
1796 #endif /* TEST_UTF8 */
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)