--- utf2.c.orig Fri Feb 18 15:49:55 2005 +++ utf2.c Fri Feb 18 15:52:07 2005 @@ -25,8 +25,11 @@ */ #include <sys/param.h> +/* dumb down UTF-8 to do UTF2 */ __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.11 2004/07/27 06:29:48 tjr Exp $"); +#include "xlocale_private.h" + #include <errno.h> #include <limits.h> #include <runetype.h> @@ -35,54 +38,55 @@ #include <wchar.h> #include "mblocal.h" -size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, - mbstate_t * __restrict); -int _UTF8_mbsinit(const mbstate_t *); -size_t _UTF8_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict, - size_t, size_t, mbstate_t * __restrict); -size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); -size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict); +#define UTF2_MB_CUR_MAX 3 + +static size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict, locale_t); +static int _UTF2_mbsinit(const mbstate_t *, locale_t); +static size_t _UTF2_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict, + size_t, size_t, mbstate_t * __restrict, locale_t); +static size_t _UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict, locale_t); +static size_t _UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, + size_t, size_t, mbstate_t * __restrict, locale_t); typedef struct { wchar_t ch; int want; wchar_t lbound; -} _UTF8State; +} _UTF2State; -int -_UTF8_init(_RuneLocale *rl) +__private_extern__ int +_UTF2_init(struct __xlocale_st_runelocale *xrl) { - __mbrtowc = _UTF8_mbrtowc; - __wcrtomb = _UTF8_wcrtomb; - __mbsinit = _UTF8_mbsinit; - __mbsnrtowcs = _UTF8_mbsnrtowcs; - __wcsnrtombs = _UTF8_wcsnrtombs; - _CurrentRuneLocale = rl; - __mb_cur_max = 6; + xrl->__mbrtowc = _UTF2_mbrtowc; + xrl->__wcrtomb = _UTF2_wcrtomb; + xrl->__mbsinit = _UTF2_mbsinit; + xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs; + xrl->__wcsnrtombs = _UTF2_wcsnrtombs; + xrl->__mb_cur_max = UTF2_MB_CUR_MAX; return (0); } -int -_UTF8_mbsinit(const mbstate_t *ps) +static int +_UTF2_mbsinit(const mbstate_t *ps, locale_t loc) { - return (ps == NULL || ((const _UTF8State *)ps)->want == 0); + return (ps == NULL || ((const _UTF2State *)ps)->want == 0); } -size_t -_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps) +static size_t +_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, + mbstate_t * __restrict ps, locale_t loc) { - _UTF8State *us; + _UTF2State *us; int ch, i, mask, want; wchar_t lbound, wch; - us = (_UTF8State *)ps; + us = (_UTF2State *)ps; - if (us->want < 0 || us->want > 6) { + if (us->want < 0 || us->want > 3) { errno = EINVAL; return ((size_t)-1); } @@ -130,21 +134,9 @@ mask = 0x0f; want = 3; lbound = 0x800; - } else if ((ch & 0xf8) == 0xf0) { - mask = 0x07; - want = 4; - lbound = 0x10000; - } else if ((ch & 0xfc) == 0xf8) { - mask = 0x03; - want = 5; - lbound = 0x200000; - } else if ((ch & 0xfc) == 0xfc) { - mask = 0x01; - want = 6; - lbound = 0x4000000; } else { /* - * Malformed input; input is not UTF-8. + * Malformed input; input is not UTF2. */ errno = EILSEQ; return ((size_t)-1); @@ -194,17 +186,17 @@ return (wch == L'\0' ? 0 : want); } -size_t -_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, - size_t nms, size_t len, mbstate_t * __restrict ps) +static size_t +_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc) { - _UTF8State *us; + _UTF2State *us; const char *s; size_t nchr; wchar_t wc; size_t nb; - us = (_UTF8State *)ps; + us = (_UTF2State *)ps; s = *src; nchr = 0; @@ -226,7 +218,7 @@ * excluding NUL. */ nb = 1; - else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) == + else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) == (size_t)-1) /* Invalid sequence - mbrtowc() sets errno. */ return ((size_t)-1); @@ -256,7 +248,7 @@ */ *dst = (wchar_t)*s; nb = 1; - } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) == + } else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) == (size_t)-1) { *src = s; return ((size_t)-1); @@ -276,14 +268,14 @@ return (nchr); } -size_t -_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) +static size_t +_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc) { - _UTF8State *us; + _UTF2State *us; unsigned char lead; int i, len; - us = (_UTF8State *)ps; + us = (_UTF2State *)ps; if (us->want != 0) { errno = EINVAL; @@ -315,15 +307,6 @@ } else if ((wc & ~0xffff) == 0) { lead = 0xe0; len = 3; - } else if ((wc & ~0x1fffff) == 0) { - lead = 0xf0; - len = 4; - } else if ((wc & ~0x3ffffff) == 0) { - lead = 0xf8; - len = 5; - } else if ((wc & ~0x7fffffff) == 0) { - lead = 0xfc; - len = 6; } else { errno = EILSEQ; return ((size_t)-1); @@ -344,17 +327,17 @@ return (len); } -size_t -_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, - size_t nwc, size_t len, mbstate_t * __restrict ps) +static size_t +_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc) { - _UTF8State *us; + _UTF2State *us; char buf[MB_LEN_MAX]; const wchar_t *s; size_t nbytes; size_t nb; - us = (_UTF8State *)ps; + us = (_UTF2State *)ps; if (us->want != 0) { errno = EINVAL; @@ -369,7 +352,7 @@ if (0 <= *s && *s < 0x80) /* Fast path for plain ASCII characters. */ nb = 1; - else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == + else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) == (size_t)-1) /* Invalid character - wcrtomb() sets errno. */ return ((size_t)-1); @@ -386,9 +369,9 @@ /* Fast path for plain ASCII characters. */ nb = 1; *dst = *s; - } else if (len > (size_t)MB_CUR_MAX) { + } else if (len > (size_t)UTF2_MB_CUR_MAX) { /* Enough space to translate in-place. */ - if ((nb = (int)_UTF8_wcrtomb(dst, *s, ps)) < 0) { + if ((nb = (int)_UTF2_wcrtomb(dst, *s, ps, loc)) < 0) { *src = s; return ((size_t)-1); } @@ -396,7 +379,7 @@ /* * May not be enough space; use temp. buffer. */ - if ((nb = (int)_UTF8_wcrtomb(buf, *s, ps)) < 0) { + if ((nb = (int)_UTF2_wcrtomb(buf, *s, ps, loc)) < 0) { *src = s; return ((size_t)-1); }