#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include "Xlibint.h"
#include "XlcPubI.h"
#include "XlcGeneric.h"
static XlcConv
create_conv(
XLCd lcd,
XlcConvMethods methods)
{
XlcConv conv;
conv = (XlcConv) Xmalloc(sizeof(XlcConvRec));
if (conv == (XlcConv) NULL)
return (XlcConv) NULL;
conv->methods = methods;
conv->state = NULL;
return conv;
}
static void
close_converter(
XlcConv conv)
{
Xfree((char *) conv);
}
#define BAD_WCHAR ((ucs4_t) 0xfffd)
#define BAD_CHAR '?'
typedef unsigned int ucs4_t;
#define conv_t XlcConv
typedef struct _Utf8ConvRec {
const char *name;
XrmQuark xrm_name;
int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
} Utf8ConvRec, *Utf8Conv;
#define RET_ILSEQ 0
#define RET_TOOFEW(n) (-1-(n))
#define RET_TOOSMALL -1
#include "lcUniConv/utf8.h"
#include "lcUniConv/ucs2be.h"
#ifdef notused
#include "lcUniConv/ascii.h"
#endif
#include "lcUniConv/iso8859_1.h"
#include "lcUniConv/iso8859_2.h"
#include "lcUniConv/iso8859_3.h"
#include "lcUniConv/iso8859_4.h"
#include "lcUniConv/iso8859_5.h"
#include "lcUniConv/iso8859_6.h"
#include "lcUniConv/iso8859_7.h"
#include "lcUniConv/iso8859_8.h"
#include "lcUniConv/iso8859_9.h"
#include "lcUniConv/iso8859_10.h"
#include "lcUniConv/iso8859_11.h"
#include "lcUniConv/iso8859_13.h"
#include "lcUniConv/iso8859_14.h"
#include "lcUniConv/iso8859_15.h"
#include "lcUniConv/iso8859_16.h"
#include "lcUniConv/iso8859_9e.h"
#include "lcUniConv/jisx0201.h"
#include "lcUniConv/tis620.h"
#include "lcUniConv/koi8_r.h"
#include "lcUniConv/koi8_u.h"
#include "lcUniConv/koi8_c.h"
#include "lcUniConv/armscii_8.h"
#include "lcUniConv/cp1133.h"
#include "lcUniConv/mulelao.h"
#include "lcUniConv/viscii.h"
#include "lcUniConv/tcvn.h"
#include "lcUniConv/georgian_academy.h"
#include "lcUniConv/georgian_ps.h"
#include "lcUniConv/cp1251.h"
#include "lcUniConv/cp1255.h"
#include "lcUniConv/cp1256.h"
#include "lcUniConv/tatar_cyr.h"
typedef struct {
unsigned short indx;
unsigned short used;
} Summary16;
#include "lcUniConv/gb2312.h"
#include "lcUniConv/jisx0208.h"
#include "lcUniConv/jisx0212.h"
#include "lcUniConv/ksc5601.h"
#include "lcUniConv/big5.h"
#include "lcUniConv/big5_emacs.h"
static Utf8ConvRec all_charsets[] = {
{ "ISO10646-1", NULLQUARK,
utf8_mbtowc, utf8_wctomb
},
{ "ISO8859-1", NULLQUARK,
iso8859_1_mbtowc, iso8859_1_wctomb
},
{ "ISO8859-2", NULLQUARK,
iso8859_2_mbtowc, iso8859_2_wctomb
},
{ "ISO8859-3", NULLQUARK,
iso8859_3_mbtowc, iso8859_3_wctomb
},
{ "ISO8859-4", NULLQUARK,
iso8859_4_mbtowc, iso8859_4_wctomb
},
{ "ISO8859-5", NULLQUARK,
iso8859_5_mbtowc, iso8859_5_wctomb
},
{ "ISO8859-6", NULLQUARK,
iso8859_6_mbtowc, iso8859_6_wctomb
},
{ "ISO8859-7", NULLQUARK,
iso8859_7_mbtowc, iso8859_7_wctomb
},
{ "ISO8859-8", NULLQUARK,
iso8859_8_mbtowc, iso8859_8_wctomb
},
{ "ISO8859-9", NULLQUARK,
iso8859_9_mbtowc, iso8859_9_wctomb
},
{ "ISO8859-10", NULLQUARK,
iso8859_10_mbtowc, iso8859_10_wctomb
},
{ "ISO8859-11", NULLQUARK,
iso8859_11_mbtowc, iso8859_11_wctomb
},
{ "ISO8859-13", NULLQUARK,
iso8859_13_mbtowc, iso8859_13_wctomb
},
{ "ISO8859-14", NULLQUARK,
iso8859_14_mbtowc, iso8859_14_wctomb
},
{ "ISO8859-15", NULLQUARK,
iso8859_15_mbtowc, iso8859_15_wctomb
},
{ "ISO8859-16", NULLQUARK,
iso8859_16_mbtowc, iso8859_16_wctomb
},
{ "JISX0201.1976-0", NULLQUARK,
jisx0201_mbtowc, jisx0201_wctomb
},
{ "TIS620-0", NULLQUARK,
tis620_mbtowc, tis620_wctomb
},
{ "GB2312.1980-0", NULLQUARK,
gb2312_mbtowc, gb2312_wctomb
},
{ "JISX0208.1983-0", NULLQUARK,
jisx0208_mbtowc, jisx0208_wctomb
},
{ "JISX0208.1990-0", NULLQUARK,
jisx0208_mbtowc, jisx0208_wctomb
},
{ "JISX0212.1990-0", NULLQUARK,
jisx0212_mbtowc, jisx0212_wctomb
},
{ "KSC5601.1987-0", NULLQUARK,
ksc5601_mbtowc, ksc5601_wctomb
},
{ "KOI8-R", NULLQUARK,
koi8_r_mbtowc, koi8_r_wctomb
},
{ "KOI8-U", NULLQUARK,
koi8_u_mbtowc, koi8_u_wctomb
},
{ "KOI8-C", NULLQUARK,
koi8_c_mbtowc, koi8_c_wctomb
},
{ "TATAR-CYR", NULLQUARK,
tatar_cyr_mbtowc, tatar_cyr_wctomb
},
{ "ARMSCII-8", NULLQUARK,
armscii_8_mbtowc, armscii_8_wctomb
},
{ "IBM-CP1133", NULLQUARK,
cp1133_mbtowc, cp1133_wctomb
},
{ "MULELAO-1", NULLQUARK,
mulelao_mbtowc, mulelao_wctomb
},
{ "VISCII1.1-1", NULLQUARK,
viscii_mbtowc, viscii_wctomb
},
{ "TCVN-5712", NULLQUARK,
tcvn_mbtowc, tcvn_wctomb
},
{ "GEORGIAN-ACADEMY", NULLQUARK,
georgian_academy_mbtowc, georgian_academy_wctomb
},
{ "GEORGIAN-PS", NULLQUARK,
georgian_ps_mbtowc, georgian_ps_wctomb
},
{ "ISO8859-9E", NULLQUARK,
iso8859_9e_mbtowc, iso8859_9e_wctomb
},
{ "MICROSOFT-CP1251", NULLQUARK,
cp1251_mbtowc, cp1251_wctomb
},
{ "MICROSOFT-CP1255", NULLQUARK,
cp1255_mbtowc, cp1255_wctomb
},
{ "MICROSOFT-CP1256", NULLQUARK,
cp1256_mbtowc, cp1256_wctomb
},
{ "BIG5-0", NULLQUARK,
big5_mbtowc, big5_wctomb
},
{ "BIG5-E0", NULLQUARK,
big5_0_mbtowc, big5_0_wctomb
},
{ "BIG5-E1", NULLQUARK,
big5_1_mbtowc, big5_1_wctomb
},
{ "ISO10646-1", NULLQUARK,
utf8_mbtowc, utf8_wctomb
},
{ "ISO10646-1", NULLQUARK,
ucs2be_mbtowc, ucs2be_wctomb
}
};
#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
#define all_charsets_count (charsets_table_size - 1)
#define ucs2_conv_index (charsets_table_size - 1)
static void
init_all_charsets (void)
{
Utf8Conv convptr;
int i;
for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
convptr->xrm_name = XrmStringToQuark(convptr->name);
}
#define lazy_init_all_charsets() \
do { \
if (all_charsets[0].xrm_name == NULLQUARK) \
init_all_charsets(); \
} while (0)
static int
cstoutf8(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
XlcCharSet charset;
const char *name;
Utf8Conv convptr;
int i;
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
if (num_args < 1)
return -1;
charset = (XlcCharSet) args[0];
name = charset->encoding_name;
for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
if (!strcmp(convptr->name, name))
break;
if (i == 0)
return -1;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend) {
ucs4_t wc;
int consumed;
int count;
consumed = convptr->cstowc(conv, &wc, src, srcend-src);
if (consumed == RET_ILSEQ)
return -1;
if (consumed == RET_TOOFEW(0))
break;
count = utf8_wctomb(NULL, dst, wc, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
if (count == RET_TOOSMALL)
break;
unconv_num++;
}
src += consumed;
dst += count;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec methods_cstoutf8 = {
close_converter,
cstoutf8,
NULL
};
static XlcConv
open_cstoutf8(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
lazy_init_all_charsets();
return create_conv(from_lcd, &methods_cstoutf8);
}
static XlcConv
create_tocs_conv(
XLCd lcd,
XlcConvMethods methods)
{
XlcConv conv;
CodeSet *codeset_list;
int codeset_num;
int charset_num;
int i, j, k;
Utf8Conv *preferred;
lazy_init_all_charsets();
codeset_list = XLC_GENERIC(lcd, codeset_list);
codeset_num = XLC_GENERIC(lcd, codeset_num);
charset_num = 0;
for (i = 0; i < codeset_num; i++)
charset_num += codeset_list[i]->num_charsets;
if (charset_num > all_charsets_count-1)
charset_num = all_charsets_count-1;
conv = (XlcConv) Xmalloc(sizeof(XlcConvRec)
+ (charset_num + 1) * sizeof(Utf8Conv));
if (conv == (XlcConv) NULL)
return (XlcConv) NULL;
preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
charset_num = 0;
for (i = 0; i < codeset_num; i++) {
XlcCharSet *charsets = codeset_list[i]->charset_list;
int num_charsets = codeset_list[i]->num_charsets;
for (j = 0; j < num_charsets; j++) {
const char *name = charsets[j]->encoding_name;
for (k = charset_num-1; k >= 0; k--)
if (!strcmp(preferred[k]->name, name))
break;
if (k < 0) {
for (k = 0; k < all_charsets_count-1; k++)
if (!strcmp(all_charsets[k].name, name)) {
preferred[charset_num++] = &all_charsets[k];
break;
}
}
}
}
preferred[charset_num] = (Utf8Conv) NULL;
conv->methods = methods;
conv->state = (XPointer) preferred;
return conv;
}
static void
close_tocs_converter(
XlcConv conv)
{
Xfree((char *) conv);
}
static int
charset_wctocs(
Utf8Conv *preferred,
Utf8Conv *charsetp,
XlcSide *sidep,
XlcConv conv,
unsigned char *r,
ucs4_t wc,
int n)
{
int count;
Utf8Conv convptr;
int i;
for (; *preferred != (Utf8Conv) NULL; preferred++) {
convptr = *preferred;
count = convptr->wctocs(conv, r, wc, n);
if (count == RET_TOOSMALL)
return RET_TOOSMALL;
if (count != RET_ILSEQ) {
*charsetp = convptr;
*sidep = (*r < 0x80 ? XlcGL : XlcGR);
return count;
}
}
for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
count = convptr->wctocs(conv, r, wc, n);
if (count == RET_TOOSMALL)
return RET_TOOSMALL;
if (count != RET_ILSEQ) {
*charsetp = convptr;
*sidep = (*r < 0x80 ? XlcGL : XlcGR);
return count;
}
}
return RET_ILSEQ;
}
static int
utf8tocs(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
Utf8Conv *preferred_charsets;
XlcCharSet last_charset = NULL;
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
preferred_charsets = (Utf8Conv *) conv->state;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
Utf8Conv chosen_charset = NULL;
XlcSide chosen_side = XlcNONE;
ucs4_t wc;
int consumed;
int count;
consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
if (consumed == RET_TOOFEW(0))
break;
if (consumed == RET_ILSEQ) {
src++;
unconv_num++;
continue;
}
count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
src += consumed;
unconv_num++;
continue;
}
if (last_charset == NULL) {
last_charset =
_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
if (last_charset == NULL) {
src += consumed;
unconv_num++;
continue;
}
} else {
if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
&& (last_charset->side == XlcGLGR
|| last_charset->side == chosen_side)))
break;
}
src += consumed;
dst += count;
}
if (last_charset == NULL)
return -1;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
if (num_args >= 1)
*((XlcCharSet *)args[0]) = last_charset;
return unconv_num;
}
static XlcConvMethodsRec methods_utf8tocs = {
close_tocs_converter,
utf8tocs,
NULL
};
static XlcConv
open_utf8tocs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_tocs_conv(from_lcd, &methods_utf8tocs);
}
static int
utf8tocs1(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
Utf8Conv *preferred_charsets;
XlcCharSet last_charset = NULL;
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
preferred_charsets = (Utf8Conv *) conv->state;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
Utf8Conv chosen_charset = NULL;
XlcSide chosen_side = XlcNONE;
ucs4_t wc;
int consumed;
int count;
consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
if (consumed == RET_TOOFEW(0))
break;
if (consumed == RET_ILSEQ) {
src++;
unconv_num++;
continue;
}
count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
src += consumed;
unconv_num++;
continue;
}
if (last_charset == NULL) {
last_charset =
_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
if (last_charset == NULL) {
src += consumed;
unconv_num++;
continue;
}
} else {
if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
&& (last_charset->side == XlcGLGR
|| last_charset->side == chosen_side)))
break;
}
src += consumed;
dst += count;
break;
}
if (last_charset == NULL)
return -1;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
if (num_args >= 1)
*((XlcCharSet *)args[0]) = last_charset;
return unconv_num;
}
static XlcConvMethodsRec methods_utf8tocs1 = {
close_tocs_converter,
utf8tocs1,
NULL
};
static XlcConv
open_utf8tocs1(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_tocs_conv(from_lcd, &methods_utf8tocs1);
}
static int
utf8tostr(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend) {
unsigned char c;
ucs4_t wc;
int consumed;
consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
if (consumed == RET_TOOFEW(0))
break;
if (dst == dstend)
break;
if (consumed == RET_ILSEQ) {
consumed = 1;
c = BAD_CHAR;
unconv_num++;
} else {
if ((wc & ~(ucs4_t)0xff) != 0) {
c = BAD_CHAR;
unconv_num++;
} else
c = (unsigned char) wc;
}
*dst++ = c;
src += consumed;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec methods_utf8tostr = {
close_converter,
utf8tostr,
NULL
};
static XlcConv
open_utf8tostr(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_utf8tostr);
}
static int
strtoutf8(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
if (from == NULL || *from == NULL)
return 0;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
while (src < srcend) {
int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
if (count == RET_TOOSMALL)
break;
dst += count;
src++;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return 0;
}
static XlcConvMethodsRec methods_strtoutf8 = {
close_converter,
strtoutf8,
NULL
};
static XlcConv
open_strtoutf8(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_strtoutf8);
}
XPointer
_Utf8GetConvByName(
const char *name)
{
XrmQuark xrm_name;
Utf8Conv convptr;
int i;
if (name == NULL)
return (XPointer) NULL;
lazy_init_all_charsets();
xrm_name = XrmStringToQuark(name);
for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
if (convptr->xrm_name == xrm_name)
return (XPointer) convptr->wctocs;
return (XPointer) NULL;
}
static XlcConv
create_ucstocs_conv(
XLCd lcd,
XlcConvMethods methods)
{
if (XLC_PUBLIC_PART(lcd)->codeset
&& _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
XlcConv conv;
Utf8Conv *preferred;
lazy_init_all_charsets();
conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
if (conv == (XlcConv) NULL)
return (XlcConv) NULL;
preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
preferred[0] = &all_charsets[0];
preferred[1] = (Utf8Conv) NULL;
conv->methods = methods;
conv->state = (XPointer) preferred;
return conv;
} else {
return create_tocs_conv(lcd, methods);
}
}
static int
charset_wctocs_exactly(
Utf8Conv *preferred,
Utf8Conv *charsetp,
XlcSide *sidep,
XlcConv conv,
unsigned char *r,
ucs4_t wc,
int n)
{
int count;
Utf8Conv convptr;
for (; *preferred != (Utf8Conv) NULL; preferred++) {
convptr = *preferred;
count = convptr->wctocs(conv, r, wc, n);
if (count == RET_TOOSMALL)
return RET_TOOSMALL;
if (count != RET_ILSEQ) {
*charsetp = convptr;
*sidep = (*r < 0x80 ? XlcGL : XlcGR);
return count;
}
}
return RET_ILSEQ;
}
static int
ucstocs1(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
ucs4_t const *src = (ucs4_t const *) *from;
unsigned char *dst = (unsigned char *) *to;
int unconv_num = 0;
Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
Utf8Conv chosen_charset = NULL;
XlcSide chosen_side = XlcNONE;
XlcCharSet charset = NULL;
int count;
if (from == NULL || *from == NULL)
return 0;
count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
&chosen_side, conv, dst, *src, *to_left);
if (count < 1) {
unconv_num++;
count = 0;
} else {
charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
}
if (charset == NULL)
return -1;
*from = (XPointer) ++src;
(*from_left)--;
*to = (XPointer) dst;
*to_left -= count;
if (num_args >= 1)
*((XlcCharSet *)args[0]) = charset;
return unconv_num;
}
static XlcConvMethodsRec methods_ucstocs1 = {
close_tocs_converter,
ucstocs1,
NULL
};
static XlcConv
open_ucstocs1(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
}
static int
ucstoutf8(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
const ucs4_t *src;
const ucs4_t *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
src = (const ucs4_t *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend) {
int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ)
unconv_num++;
src++;
dst += count;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec methods_ucstoutf8 = {
close_converter,
ucstoutf8,
NULL
};
static XlcConv
open_ucstoutf8(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_ucstoutf8);
}
void
_XlcAddUtf8Converters(
XLCd lcd)
{
_XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
_XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
_XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
_XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
_XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
_XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNChar, open_ucstocs1);
_XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNUtf8String, open_ucstoutf8);
}
static int
utf8towcs(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
unsigned char const *src;
unsigned char const *srcend;
wchar_t *dst;
wchar_t *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (wchar_t *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
ucs4_t wc;
int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
if (consumed == RET_TOOFEW(0))
break;
if (consumed == RET_ILSEQ) {
src++;
*dst = BAD_WCHAR;
unconv_num++;
} else {
src += consumed;
*dst = wc;
}
dst++;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec methods_utf8towcs = {
close_converter,
utf8towcs,
NULL
};
static XlcConv
open_utf8towcs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_utf8towcs);
}
static int
wcstoutf8(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
wchar_t const *src;
wchar_t const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
src = (wchar_t const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend) {
int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
if (count == RET_TOOSMALL)
break;
unconv_num++;
}
dst += count;
src++;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec methods_wcstoutf8 = {
close_converter,
wcstoutf8,
NULL
};
static XlcConv
open_wcstoutf8(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_wcstoutf8);
}
static int
our_strtowcs(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
unsigned char const *src;
unsigned char const *srcend;
wchar_t *dst;
wchar_t *dstend;
if (from == NULL || *from == NULL)
return 0;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (wchar_t *) *to;
dstend = dst + *to_left;
while (src < srcend && dst < dstend)
*dst++ = (wchar_t) *src++;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return 0;
}
static XlcConvMethodsRec methods_strtowcs = {
close_converter,
our_strtowcs,
NULL
};
static XlcConv
open_strtowcs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_strtowcs);
}
static int
our_wcstostr(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
wchar_t const *src;
wchar_t const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
src = (wchar_t const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
unsigned int wc = *src++;
if (wc < 0x80)
*dst = wc;
else {
*dst = BAD_CHAR;
unconv_num++;
}
dst++;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec methods_wcstostr = {
close_converter,
our_wcstostr,
NULL
};
static XlcConv
open_wcstostr(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_wcstostr);
}
static int
cstowcs(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
XlcCharSet charset;
const char *name;
Utf8Conv convptr;
int i;
unsigned char const *src;
unsigned char const *srcend;
wchar_t *dst;
wchar_t *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
if (num_args < 1)
return -1;
charset = (XlcCharSet) args[0];
name = charset->encoding_name;
for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
if (!strcmp(convptr->name, name))
break;
if (i == 0)
return -1;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (wchar_t *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
unsigned int wc;
int consumed;
consumed = convptr->cstowc(conv, &wc, src, srcend-src);
if (consumed == RET_ILSEQ)
return -1;
if (consumed == RET_TOOFEW(0))
break;
*dst++ = wc;
src += consumed;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec methods_cstowcs = {
close_converter,
cstowcs,
NULL
};
static XlcConv
open_cstowcs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
lazy_init_all_charsets();
return create_conv(from_lcd, &methods_cstowcs);
}
static int
wcstocs(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
Utf8Conv *preferred_charsets;
XlcCharSet last_charset = NULL;
wchar_t const *src;
wchar_t const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
preferred_charsets = (Utf8Conv *) conv->state;
src = (wchar_t const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
Utf8Conv chosen_charset = NULL;
XlcSide chosen_side = XlcNONE;
wchar_t wc = *src;
int count;
count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
src++;
unconv_num++;
continue;
}
if (last_charset == NULL) {
last_charset =
_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
if (last_charset == NULL) {
src++;
unconv_num++;
continue;
}
} else {
if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
&& (last_charset->side == XlcGLGR
|| last_charset->side == chosen_side)))
break;
}
src++;
dst += count;
}
if (last_charset == NULL)
return -1;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
if (num_args >= 1)
*((XlcCharSet *)args[0]) = last_charset;
return unconv_num;
}
static XlcConvMethodsRec methods_wcstocs = {
close_tocs_converter,
wcstocs,
NULL
};
static XlcConv
open_wcstocs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_tocs_conv(from_lcd, &methods_wcstocs);
}
static int
wcstocs1(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
Utf8Conv *preferred_charsets;
XlcCharSet last_charset = NULL;
wchar_t const *src;
wchar_t const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
preferred_charsets = (Utf8Conv *) conv->state;
src = (wchar_t const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
Utf8Conv chosen_charset = NULL;
XlcSide chosen_side = XlcNONE;
wchar_t wc = *src;
int count;
count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
src++;
unconv_num++;
continue;
}
if (last_charset == NULL) {
last_charset =
_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
if (last_charset == NULL) {
src++;
unconv_num++;
continue;
}
} else {
if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
&& (last_charset->side == XlcGLGR
|| last_charset->side == chosen_side)))
break;
}
src++;
dst += count;
break;
}
if (last_charset == NULL)
return -1;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
if (num_args >= 1)
*((XlcCharSet *)args[0]) = last_charset;
return unconv_num;
}
static XlcConvMethodsRec methods_wcstocs1 = {
close_tocs_converter,
wcstocs1,
NULL
};
static XlcConv
open_wcstocs1(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_tocs_conv(from_lcd, &methods_wcstocs1);
}
static int
identity(
XlcConv conv,
XPointer *from,
int *from_left,
XPointer *to,
int *to_left,
XPointer *args,
int num_args)
{
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
if (from == NULL || *from == NULL)
return 0;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
while (src < srcend && dst < dstend)
*dst++ = *src++;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return 0;
}
static XlcConvMethodsRec methods_identity = {
close_converter,
identity,
NULL
};
static XlcConv
open_identity(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_conv(from_lcd, &methods_identity);
}
static XlcConv
create_tofontcs_conv(
XLCd lcd,
XlcConvMethods methods)
{
XlcConv conv;
int i, num, k, count;
char **value, buf[20];
Utf8Conv *preferred;
lazy_init_all_charsets();
for (i = 0, num = 0;; i++) {
sprintf(buf, "fs%d.charset.name", i);
_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
if (count < 1) {
sprintf(buf, "fs%d.charset", i);
_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
if (count < 1)
break;
}
num += count;
}
conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
if (conv == (XlcConv) NULL)
return (XlcConv) NULL;
preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
for (i = 0, num = 0;; i++) {
sprintf(buf, "fs%d.charset.name", i);
_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
if (count < 1) {
sprintf(buf, "fs%d.charset", i);
_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
if (count < 1)
break;
}
while (count-- > 0) {
XlcCharSet charset = _XlcGetCharSet(*value++);
const char *name;
if (charset == (XlcCharSet) NULL)
continue;
name = charset->encoding_name;
for (k = num - 1; k >= 0; k--)
if (!strcmp(preferred[k]->name, name))
break;
if (k < 0) {
if (!strcmp("ISO10646-1", name)) {
preferred[num++] = &all_charsets[ucs2_conv_index];
continue;
}
for (k = 0; k < all_charsets_count-1; k++)
if (!strcmp(all_charsets[k].name, name)) {
preferred[num++] = &all_charsets[k];
break;
}
}
}
}
preferred[num] = (Utf8Conv) NULL;
conv->methods = methods;
conv->state = (XPointer) preferred;
return conv;
}
static XlcConv
open_wcstofcs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_tofontcs_conv(from_lcd, &methods_wcstocs);
}
static XlcConv
open_utf8tofcs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
}
void
_XlcAddUtf8LocaleConverters(
XLCd lcd)
{
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
_XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
_XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
_XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
_XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
_XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
}