#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#include <stdio.h>
#if HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <errno.h>
#ifndef errno
extern int errno;
#endif
#if HAVE_ICONV
# include <iconv.h>
#endif
#ifndef EILSEQ
# define EILSEQ ENOENT
#endif
#ifndef ENOTSUP
# define ENOTSUP EINVAL
#endif
#if HAVE_LANGINFO_CODESET && ! USE_INCLUDED_LIBINTL
# include <langinfo.h>
#endif
#include "unicodeio.h"
static int
utf8_wctomb (unsigned char *r, unsigned int wc)
{
int count;
if (wc < 0x80)
count = 1;
else if (wc < 0x800)
count = 2;
else if (wc < 0x10000)
count = 3;
else if (wc < 0x200000)
count = 4;
else if (wc < 0x4000000)
count = 5;
else if (wc <= 0x7fffffff)
count = 6;
else
return -1;
switch (count)
{
case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
case 1: r[0] = wc;
}
return count;
}
#define UTF8_NAME "UTF-8"
int
unicode_to_mb (unsigned int code,
int (*success) PARAMS((const char *buf, size_t buflen,
void *callback_arg)),
int (*failure) PARAMS((unsigned int code,
void *callback_arg)),
void *callback_arg)
{
static int initialized;
static int is_utf8;
#if HAVE_ICONV
static iconv_t utf8_to_local;
#endif
char inbuf[6];
int count;
if (!initialized)
{
const char *charset;
#if USE_INCLUDED_LIBINTL
extern const char *locale_charset PARAMS ((void));
charset = locale_charset ();
#else
# if HAVE_LANGINFO_CODESET
charset = nl_langinfo (CODESET);
# else
charset = "";
# endif
#endif
is_utf8 = !strcmp (charset, UTF8_NAME);
#if HAVE_ICONV
if (!is_utf8)
{
utf8_to_local = iconv_open (charset, UTF8_NAME);
if (utf8_to_local == (iconv_t)(-1))
{
utf8_to_local = iconv_open ("ASCII", UTF8_NAME);
if (utf8_to_local == (iconv_t)(-1))
return failure (code, callback_arg);
}
}
#endif
initialized = 1;
}
count = utf8_wctomb ((unsigned char *) inbuf, code);
if (count < 0)
{
errno = EILSEQ;
return failure (code, callback_arg);
}
if (is_utf8)
{
return success (inbuf, count, callback_arg);
}
else
{
#if HAVE_ICONV
char outbuf[25];
const char *inptr;
size_t inbytesleft;
char *outptr;
size_t outbytesleft;
size_t res;
inptr = inbuf;
inbytesleft = count;
outptr = outbuf;
outbytesleft = sizeof (outbuf);
res = iconv (utf8_to_local,
(ICONV_CONST char **)&inptr, &inbytesleft,
&outptr, &outbytesleft);
if (inbytesleft > 0 || res == (size_t)(-1)
# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
|| (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0')
# endif
)
{
if (res != (size_t)(-1))
errno = EILSEQ;
return failure (code, callback_arg);
}
# if defined _LIBICONV_VERSION \
|| !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft);
if (res == (size_t)(-1))
return failure (code, callback_arg);
# endif
return success (outbuf, outptr - outbuf, callback_arg);
#else
errno = ENOTSUP;
return failure (code, callback_arg);
#endif
}
}
int
print_unicode_success (const char *buf, size_t buflen, void *callback_arg)
{
FILE *stream = (FILE *) callback_arg;
return fwrite (buf, 1, buflen, stream) == 0 ? -1 : 0;
}
int
print_unicode_failure (unsigned int code, void *callback_arg)
{
int e = errno;
FILE *stream = callback_arg;
fprintf (stream, code < 0x10000 ? "\\u%04X" : "\\U%08X", code);
errno = e;
return -1;
}
int
print_unicode_char (FILE *stream, unsigned int code)
{
return unicode_to_mb (code, print_unicode_success, print_unicode_failure,
stream);
}