#include <sys/param.h>
#include <sys/utfconv.h>
#include <sys/errno.h>
#include <architecture/byte_order.h>
#define UCS_TO_UTF8_LEN(c) ((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : 3))
#define UCS_ALT_NULL 0x2400
static u_int16_t ucs_decompose __P((u_int16_t, u_int16_t *));
static u_int16_t ucs_combine(u_int16_t base, u_int16_t comb);
size_t
utf8_encodelen(ucsp, ucslen, altslash, flags)
const u_int16_t * ucsp;
size_t ucslen;
u_int16_t altslash;
int flags;
{
u_int16_t ucs_ch;
int charcnt;
int swapbytes = (flags & UTF_REVERSE_ENDIAN);
size_t len;
charcnt = ucslen / 2;
len = 0;
while (charcnt-- > 0) {
ucs_ch = *ucsp++;
if (swapbytes)
ucs_ch = NXSwapShort(ucs_ch);
if (ucs_ch == '/')
ucs_ch = altslash ? altslash : '_';
else if (ucs_ch == '\0')
ucs_ch = UCS_ALT_NULL;
len += UCS_TO_UTF8_LEN(ucs_ch);
}
return (len);
}
int utf8_encodestr(ucsp, ucslen, utf8p, utf8len, buflen, altslash, flags)
const u_int16_t * ucsp;
size_t ucslen;
u_int8_t * utf8p;
size_t * utf8len;
size_t buflen;
u_int16_t altslash;
int flags;
{
u_int8_t * bufstart;
u_int8_t * bufend;
u_int16_t ucs_ch;
u_int16_t extra[2] = {0};
int charcnt;
int swapbytes = (flags & UTF_REVERSE_ENDIAN);
int nullterm = ((flags & UTF_NO_NULL_TERM) == 0);
int decompose = (flags & UTF_DECOMPOSED);
int result = 0;
bufstart = utf8p;
bufend = bufstart + buflen;
if (nullterm)
--bufend;
charcnt = ucslen / 2;
while (charcnt-- > 0) {
if (!decompose)
ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
else if (extra[0]) {
ucs_ch = extra[0]; extra[0] = 0;
} else if (extra[1]) {
ucs_ch = extra[1]; extra[1] = 0;
} else {
ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
ucs_ch = ucs_decompose(ucs_ch, &extra[0]);
if (extra[0])
charcnt++;
if (extra[1])
charcnt++;
}
if (ucs_ch == '/') {
if (altslash)
ucs_ch = altslash;
else {
ucs_ch = '_';
result = EINVAL;
}
} else if (ucs_ch == '\0') {
ucs_ch = UCS_ALT_NULL;
}
if (ucs_ch < 0x0080) {
if (utf8p >= bufend) {
result = ENAMETOOLONG;
break;
}
*utf8p++ = ucs_ch;
} else if (ucs_ch < 0x800) {
if ((utf8p + 1) >= bufend) {
result = ENAMETOOLONG;
break;
}
*utf8p++ = (ucs_ch >> 6) | 0xc0;
*utf8p++ = (ucs_ch & 0x3f) | 0x80;
} else {
if ((utf8p + 2) >= bufend) {
result = ENAMETOOLONG;
break;
}
*utf8p++ = (ucs_ch >> 12) | 0xe0;
*utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
*utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
}
}
*utf8len = utf8p - bufstart;
if (nullterm)
*utf8p++ = '\0';
return (result);
}
int
utf8_decodestr(utf8p, utf8len, ucsp, ucslen, buflen, altslash, flags)
const u_int8_t* utf8p;
size_t utf8len;
u_int16_t* ucsp;
size_t *ucslen;
size_t buflen;
u_int16_t altslash;
int flags;
{
u_int16_t* bufstart;
u_int16_t* bufend;
u_int16_t ucs_ch;
u_int8_t byte;
int result = 0;
int decompose, precompose, swapbytes;
decompose = (flags & UTF_DECOMPOSED);
precompose = (flags & UTF_PRECOMPOSED);
swapbytes = (flags & UTF_REVERSE_ENDIAN);
bufstart = ucsp;
bufend = (u_int16_t *)((u_int8_t *)ucsp + buflen);
while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
if (ucsp >= bufend) {
result = ENAMETOOLONG;
goto stop;
}
if (byte < 0x80) {
ucs_ch = byte;
} else {
switch (byte & 0xf0) {
case 0xc0:
case 0xd0:
ucs_ch = (byte & 0x1F) << 6;
if (ucs_ch < 0x0080) {
result = EINVAL;
goto stop;
}
break;
case 0xe0:
ucs_ch = (byte & 0x0F) << 6;
if (((byte = *utf8p++) & 0xc0) != 0x80) {
result = EINVAL;
goto stop;
}
utf8len--;
ucs_ch += (byte & 0x3F);
ucs_ch <<= 6;
if (ucs_ch < 0x0800) {
result = EINVAL;
goto stop;
}
break;
default:
result = EINVAL;
goto stop;
}
if (((byte = *utf8p++) & 0xc0) != 0x80) {
result = EINVAL;
goto stop;
}
utf8len--;
ucs_ch += (byte & 0x3F);
if (decompose) {
u_int16_t comb_ch[2];
ucs_ch = ucs_decompose(ucs_ch, &comb_ch[0]);
if (comb_ch[0]) {
*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
if (ucsp >= bufend) {
result = ENAMETOOLONG;
goto stop;
}
ucs_ch = comb_ch[0];
if (comb_ch[1]) {
*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
if (ucsp >= bufend) {
result = ENAMETOOLONG;
goto stop;
}
ucs_ch = comb_ch[1];
}
}
} else if (precompose && (ucsp != bufstart)) {
u_int16_t composite, base;
base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
composite = ucs_combine(base, ucs_ch);
if (composite) {
--ucsp;
ucs_ch = composite;
}
}
if (ucs_ch == UCS_ALT_NULL)
ucs_ch = '\0';
}
if (ucs_ch == altslash)
ucs_ch = '/';
if (swapbytes)
ucs_ch = NXSwapShort(ucs_ch);
*ucsp++ = ucs_ch;
}
stop:
*ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart;
return (result);
}
static unsigned char primary_char[8*36] = {
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x00, 0x43,
0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49,
0x00, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x4F, 0x00,
0x00, 0x55, 0x55, 0x55, 0x55, 0x59, 0x00, 0x00,
0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x00, 0x63,
0x65, 0x65, 0x65, 0x65, 0x69, 0x69, 0x69, 0x69,
0x00, 0x6E, 0x6F, 0x6F, 0x6F, 0x6F, 0x6F, 0x00,
0x00, 0x75, 0x75, 0x75, 0x75, 0x79, 0x00, 0x79,
0x41, 0x61, 0x41, 0x61, 0x41, 0x61, 0x43, 0x63,
0x43, 0x63, 0x43, 0x63, 0x43, 0x63, 0x44, 0x64,
0x00, 0x00, 0x45, 0x65, 0x45, 0x65, 0x45, 0x65,
0x45, 0x65, 0x45, 0x65, 0x47, 0x67, 0x47, 0x67,
0x47, 0x67, 0x47, 0x67, 0x48, 0x68, 0x00, 0x00,
0x49, 0x69, 0x49, 0x69, 0x49, 0x69, 0x49, 0x69,
0x49, 0x00, 0x00, 0x00, 0x4A, 0x6A, 0x4B, 0x6B,
0x00, 0x4C, 0x6C, 0x4C, 0x6C, 0x4C, 0x6C, 0x00,
0x00, 0x00, 0x00, 0x4E, 0x6E, 0x4E, 0x6E, 0x4E,
0x6E, 0x00, 0x00, 0x00, 0x4F, 0x6F, 0x4F, 0x6F,
0x4F, 0x6F, 0x00, 0x00, 0x52, 0x72, 0x52, 0x72,
0x52, 0x72, 0x53, 0x73, 0x53, 0x73, 0x53, 0x73,
0x53, 0x73, 0x54, 0x74, 0x54, 0x74, 0x00, 0x00,
0x55, 0x75, 0x55, 0x75, 0x55, 0x75, 0x55, 0x75,
0x55, 0x75, 0x55, 0x75, 0x57, 0x77, 0x59, 0x79,
0x59, 0x5A, 0x7A, 0x5A, 0x7A, 0x5A, 0x7A, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x4F, 0x6F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55,
0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x49,
0x69, 0x4F, 0x6F, 0x55, 0x75, 0xDC, 0xFC, 0xDC,
0xFC, 0xDC, 0xFC, 0xDC, 0xFC, 0x00, 0xC4, 0xE4
};
static unsigned char combining_char[8*36] = {
0x00, 0x01, 0x02, 0x03, 0x08, 0x0A, 0xFF, 0x27,
0x00, 0x01, 0x02, 0x08, 0x00, 0x01, 0x02, 0x08,
0xFF, 0x03, 0x00, 0x01, 0x02, 0x03, 0x08, 0xFF,
0xFF, 0x00, 0x01, 0x02, 0x08, 0x01, 0xFF, 0xFF,
0x00, 0x01, 0x02, 0x03, 0x08, 0x0A, 0xFF, 0x27,
0x00, 0x01, 0x02, 0x08, 0x00, 0x01, 0x02, 0x08,
0xFF, 0x03, 0x00, 0x01, 0x02, 0x03, 0x08, 0xFF,
0xFF, 0x00, 0x01, 0x02, 0x08, 0x01, 0xFF, 0x08,
0x04, 0x04, 0x06, 0x06, 0x28, 0x28, 0x01, 0x01,
0x02, 0x02, 0x07, 0x07, 0x0C, 0x0C, 0x0C, 0x0C,
0x00, 0x00, 0x04, 0x04, 0x06, 0x06, 0x07, 0x07,
0x28, 0x28, 0x0C, 0x0C, 0x02, 0x02, 0x06, 0x06,
0x07, 0x07, 0x27, 0x27, 0x02, 0x02, 0x00, 0x00,
0x03, 0x03, 0x04, 0x04, 0x06, 0x06, 0x28, 0x28,
0x07, 0x00, 0x00, 0x00, 0x02, 0x02, 0x27, 0x27,
0x00, 0x01, 0x01, 0x27, 0x27, 0x0C, 0x0C, 0x00,
0x00, 0x00, 0x00, 0x01, 0x01, 0x27, 0x27, 0x0C,
0x0C, 0x00, 0x00, 0x00, 0x04, 0x04, 0x06, 0x06,
0x0B, 0x0B, 0x00, 0x00, 0x01, 0x01, 0x27, 0x27,
0x0C, 0x0C, 0x01, 0x01, 0x02, 0x02, 0x27, 0x27,
0x0C, 0x0C, 0x27, 0x27, 0x0C, 0x0C, 0x00, 0x00,
0x03, 0x03, 0x04, 0x04, 0x06, 0x06, 0x0A, 0x0A,
0x0B, 0x0B, 0x28, 0x28, 0x02, 0x02, 0x02, 0x02,
0x08, 0x01, 0x01, 0x07, 0x07, 0x0C, 0x0C, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x1B, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1B,
0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x0C,
0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x04, 0x04, 0x01,
0x01, 0x0C, 0x0C, 0x00, 0x00, 0x00, 0x04, 0x04
};
static const unsigned long __CyrillicDecompBitmap[] = {
0x40000040, 0x00000040, 0x00004000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
};
static const unsigned long __CJKDecompBitmap[] = {
0x00000000, 0x00000000, 0x000AAAAA, 0xA540DB6C,
0x00000802, 0x000AAAAA, 0xA540DB6C, 0x000009E2,
};
#define IS_DECOMPOSABLE(table,unicodeVal) \
(table[(unicodeVal) / 32] & (1 << (31 - ((unicodeVal) % 32))))
static u_int16_t
ucs_decompose(register u_int16_t ch, u_int16_t *cmb)
{
u_int16_t base;
cmb[0] = 0;
cmb[1] = 0;
if (ch < 0x00C0) {
base = ch;
} else if (ch <= 0x01DF) {
base = (u_int16_t) primary_char[ch - 0x00C0];
if (base == 0)
base = ch;
else {
if ((base < 0x00C0) || (primary_char[base - 0x00C0] == 0))
cmb[0] = (u_int16_t)0x0300 + (u_int16_t)combining_char[ch - 0x00C0];
else {
u_int16_t tch = base;
base = (u_int16_t)primary_char[tch - 0x00C0];
cmb[0] = (u_int16_t)0x0300 + (u_int16_t)combining_char[tch - 0x00C0];
cmb[1] = (u_int16_t)0x0300 + (u_int16_t)combining_char[ch - 0x00C0];
}
}
} else if ((ch >= 0x0400) && (ch <= 0x04FF) &&
IS_DECOMPOSABLE(__CyrillicDecompBitmap, ch - 0x0400)) {
switch(ch) {
case 0x0401: base = 0x0415; cmb[0] = 0x0308; break;
case 0x0419: base = 0x0418; cmb[0] = 0x0306; break;
case 0x0439: base = 0x0438; cmb[0] = 0x0306; break;
case 0x0451: base = 0x0435; cmb[0] = 0x0308; break;
default:
base = ch;
}
} else if (ch == 0x1E3F) {
base = 0x006D; cmb[0] = 0x0301;
} else if ((ch > 0x3000) && (ch < 0x3100) &&
IS_DECOMPOSABLE(__CJKDecompBitmap, ch - 0x3000)) {
switch(ch) {
case 0x3071: base = 0x306F; cmb[0] = 0x309A; break;
case 0x3074: base = 0x3072; cmb[0] = 0x309A; break;
case 0x3077: base = 0x3075; cmb[0] = 0x309A; break;
case 0x307A: base = 0x3078; cmb[0] = 0x309A; break;
case 0x307D: base = 0x307B; cmb[0] = 0x309A; break;
case 0x3094: base = 0x3046; cmb[0] = 0x3099; break;
case 0x30D1: base = 0x30CF; cmb[0] = 0x309A; break;
case 0x30D4: base = 0x30D2; cmb[0] = 0x309A; break;
case 0x30D7: base = 0x30D5; cmb[0] = 0x309A; break;
case 0x30DA: base = 0x30D8; cmb[0] = 0x309A; break;
case 0x30DD: base = 0x30DB; cmb[0] = 0x309A; break;
case 0x30F4: base = 0x30A6; cmb[0] = 0x3099; break;
case 0x30F7: base = 0x30EF; cmb[0] = 0x3099; break;
case 0x30F8: base = 0x30F0; cmb[0] = 0x3099; break;
case 0x30F9: base = 0x30F1; cmb[0] = 0x3099; break;
case 0x30FA: base = 0x30F2; cmb[0] = 0x3099; break;
default:
base = ch - 1;
cmb[0] = 0x3099;
}
} else if ((ch >= 0xAC00) && (ch < 0xD7A4)) {
ch -= 0xAC00;
base = 0x1100 + (ch / (21*28));
cmb[0] = 0x1161 + (ch % (21*28)) / 28;
if (ch % 28)
cmb[1] = 0x11A7 + (ch % 28);
} else {
base = ch;
}
return (base);
}
static const short diacrit_tbl[8*6] = {
0, 58, 116, 174, 232, -1, 290, 348,
406, -1, 464, 522, 580, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 638, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, 696,
754, -1, -1, -1, -1, -1, -1, -1
};
static const u_int16_t composite_tbl[58*14] = {
0x0C0, 0, 0, 0,0x0C8, 0, 0, 0,0x0CC, 0, 0, 0, 0,
0,0x0D2, 0, 0, 0, 0, 0,0x0D9, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0x0E0, 0, 0, 0,0x0E8, 0, 0, 0,0x0EC, 0, 0, 0, 0,
0,0x0F2, 0, 0, 0, 0, 0,0x0F9, 0, 0, 0, 0, 0,
0x0C1, 0,0x106, 0,0x0C9, 0, 0, 0,0x0CD, 0, 0,0x139, 0,
0x143,0x0D3, 0, 0,0x154,0x15A, 0,0x0DA, 0, 0, 0,0x0DD,0x179,
0, 0, 0, 0, 0, 0,
0x0E1, 0,0x107, 0,0x0E9, 0, 0, 0,0x0ED, 0, 0,0x13A,0x1E3F,
0x144,0x0F3, 0, 0,0x155,0x15B, 0,0x0FA, 0, 0, 0,0x0FD,0x17A,
0x0C2, 0,0x108, 0,0x0CA, 0,0x11C,0x124,0x0CE,0x134, 0, 0, 0,
0,0x0D4, 0, 0, 0,0x15C, 0,0x0DB, 0,0x174, 0,0x176, 0,
0, 0, 0, 0, 0, 0,
0x0E2, 0,0x109, 0,0x0EA, 0,0x11D,0x125,0x0EE,0x135, 0, 0, 0,
0,0x0F4, 0, 0, 0,0x15D, 0,0x0FB, 0,0x175, 0,0x177, 0,
0x0C3, 0, 0, 0, 0, 0, 0, 0,0x128, 0, 0, 0, 0,
0x0D1,0x0D5, 0, 0, 0, 0, 0,0x168, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0x0E3, 0, 0, 0, 0, 0, 0, 0,0x129, 0, 0, 0, 0,
0x0F1,0x0F5, 0, 0, 0, 0, 0,0x169, 0, 0, 0, 0, 0,
0x100, 0, 0, 0,0x112, 0, 0, 0,0x12A, 0, 0, 0, 0,
0,0x14C, 0, 0, 0, 0, 0,0x16A, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0x101, 0, 0, 0,0x113, 0, 0, 0,0x12B, 0, 0, 0, 0,
0,0x14D, 0, 0, 0, 0, 0,0x16B, 0, 0, 0, 0, 0,
0x102, 0, 0, 0,0x114, 0,0x11E, 0,0x12C, 0, 0, 0, 0,
0,0x14E, 0, 0, 0, 0, 0,0x16C, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0x103, 0, 0, 0,0x115, 0,0x11F, 0,0x12D, 0, 0, 0, 0,
0,0x14F, 0, 0, 0, 0, 0,0x16D, 0, 0, 0, 0, 0,
0, 0,0x10A, 0,0x116, 0,0x120, 0,0x130, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0x17B,
0, 0, 0, 0, 0, 0,
0, 0,0x10B, 0,0x117, 0,0x121, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0x17C,
0x0C4, 0, 0, 0,0x0CB, 0, 0, 0,0x0CF, 0, 0, 0, 0,
0,0x0D6, 0, 0, 0, 0, 0,0x0DC, 0, 0, 0,0x178, 0,
0, 0, 0, 0, 0, 0,
0x0E4, 0, 0, 0,0x0EB, 0, 0, 0,0x0EF, 0, 0, 0, 0,
0,0x0F6, 0, 0, 0, 0, 0,0x0FC, 0, 0, 0,0x0FF, 0,
0x0C5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,0x16E, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0x0E5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,0x16F, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,0x150, 0, 0, 0, 0, 0,0x170, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,0x151, 0, 0, 0, 0, 0,0x171, 0, 0, 0, 0, 0,
0x1CD, 0,0x10C,0x10E,0x11A, 0, 0, 0,0x1CF, 0, 0,0x13D, 0,
0x147,0x1D1, 0, 0,0x158,0x160,0x164,0x1D3, 0, 0, 0, 0,0x17D,
0, 0, 0, 0, 0, 0,
0x1CE, 0,0x10D,0x10F,0x11B, 0, 0, 0,0x1D0, 0, 0,0x13E, 0,
0x148,0x1D2, 0, 0,0x159,0x161,0x165,0x1D4, 0, 0, 0, 0,0x17E,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,0x1A0, 0, 0, 0, 0, 0,0x1AF, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,0x1A1, 0, 0, 0, 0, 0,0x1B0, 0, 0, 0, 0, 0,
0, 0,0x0C7, 0, 0, 0,0x122, 0, 0, 0,0x136,0x13B, 0,
0x145, 0, 0, 0,0x156,0x15E,0x162, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0,0x0E7, 0, 0, 0,0x123, 0, 0, 0,0x137,0x13C, 0,
0x146, 0, 0, 0,0x157,0x15F,0x163, 0, 0, 0, 0, 0, 0,
0x104, 0, 0, 0,0x118, 0, 0, 0,0x12E, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,0x172, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0x105, 0, 0, 0,0x119, 0, 0, 0,0x12F, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,0x173, 0, 0, 0, 0, 0,
};
static const unsigned long __CJKCombBitmap[] = {
0x00000000, 0x00000000, 0x02155555, 0x4A812490,
0x00000004, 0x02155555, 0x4A812490, 0x0001E004,
};
#define CAN_COMBINE(table,unicodeVal) \
(table[(unicodeVal) / 32] & (1 << (31 - ((unicodeVal) % 32))))
static u_int16_t
ucs_combine(u_int16_t base, u_int16_t comb)
{
if (comb < 0x0300)
return (0);
if (comb <= 0x032F) {
int index;
if (base >= 'A' && base <= 'z') {
index = diacrit_tbl[comb - 0x0300];
if (index < 0 ) return (0);
return (composite_tbl[index + (base - 'A')]);
}
switch (comb) {
case 0x0300:
switch (base) {
case 0x00DC: return (0x01DB);
case 0x00FC: return (0x01DC);
} break;
case 0x0301:
switch (base) {
case 0x00DC: return (0x01D7);
case 0x00FC: return (0x01D8);
} break;
case 0x0304:
switch (base) {
case 0x00DC: return (0x01D5);
case 0x00FC: return (0x01D6);
case 0x00C4: return (0x01DE);
case 0x00E4: return (0x01DF);
} break;
case 0x0306:
switch (base) {
case 0x0418: return (0x0419);
case 0x0438: return (0x0439);
} break;
case 0x0308:
switch (base) {
case 0x0415: return (0x0401);
case 0x0435: return (0x0451);
} break;
case 0x030C:
switch (base) {
case 0x00DC: return (0x01D9);
case 0x00FC: return (0x01DA);
} break;
}
return (0);
}
if (comb < 0x1161)
return (0);
if ((comb <= 0x1175) && (base >= 0x1100 && base <= 0x1112))
return (0xAC00 + ((base - 0x1100)*(21*28)) + ((comb - 0x1161)*28));
if ((comb >= 0x11A8 && comb <= 0x11C2) &&
(base >= 0xAC00 && base <= 0xD788)) {
if ((base - 0xAC00) % 28)
return (0);
else
return (base + (comb - 0x11A7));
}
if ((comb == 0x3099 || comb == 0x309A) &&
(base > 0x3000 && base < 0x3100) &&
CAN_COMBINE(__CJKCombBitmap, base - 0x3000)) {
if (comb == 0x309A) {
switch(base) {
case 0x306F: return (0x3071);
case 0x3072: return (0x3074);
case 0x3075: return (0x3077);
case 0x3078: return (0x307A);
case 0x307B: return (0x307D);
case 0x30CF: return (0x30D1);
case 0x30D2: return (0x30D4);
case 0x30D5: return (0x30D7);
case 0x30D8: return (0x30DA);
case 0x30DB: return (0x30DD);
default: return (0);
}
} else {
switch (base) {
case 0x3046: return (0x3094);
case 0x30A6: return (0x30F4);
case 0x30EF: return (0x30F7);
case 0x30F0: return (0x30F8);
case 0x30F1: return (0x30F9);
case 0x30F2: return (0x30FA);
default: return (base + 1);
}
}
}
return (0);
}