utf2.c.patch   [plain text]


--- utf2.c.orig	Fri Feb 18 15:49:55 2005
+++ utf2.c	Fri Feb 18 15:52:07 2005
@@ -25,8 +25,11 @@
  */
 
 #include <sys/param.h>
+/* dumb down UTF-8 to do UTF2 */
 __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.11 2004/07/27 06:29:48 tjr Exp $");
 
+#include "xlocale_private.h"
+
 #include <errno.h>
 #include <limits.h>
 #include <runetype.h>
@@ -35,54 +38,55 @@
 #include <wchar.h>
 #include "mblocal.h"
 
-size_t	_UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
-	    mbstate_t * __restrict);
-int	_UTF8_mbsinit(const mbstate_t *);
-size_t	_UTF8_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict,
-	    size_t, size_t, mbstate_t * __restrict);
-size_t	_UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
-size_t	_UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
-	    size_t, size_t, mbstate_t * __restrict);
+#define UTF2_MB_CUR_MAX		3
+
+static size_t	_UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
+	    mbstate_t * __restrict, locale_t);
+static int	_UTF2_mbsinit(const mbstate_t *, locale_t);
+static size_t	_UTF2_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict,
+	    size_t, size_t, mbstate_t * __restrict, locale_t);
+static size_t	_UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict, locale_t);
+static size_t	_UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
+	    size_t, size_t, mbstate_t * __restrict, locale_t);
 
 typedef struct {
 	wchar_t	ch;
 	int	want;
 	wchar_t	lbound;
-} _UTF8State;
+} _UTF2State;
 
-int
-_UTF8_init(_RuneLocale *rl)
+__private_extern__ int
+_UTF2_init(struct __xlocale_st_runelocale *xrl)
 {
 
-	__mbrtowc = _UTF8_mbrtowc;
-	__wcrtomb = _UTF8_wcrtomb;
-	__mbsinit = _UTF8_mbsinit;
-	__mbsnrtowcs = _UTF8_mbsnrtowcs;
-	__wcsnrtombs = _UTF8_wcsnrtombs;
-	_CurrentRuneLocale = rl;
-	__mb_cur_max = 6;
+	xrl->__mbrtowc = _UTF2_mbrtowc;
+	xrl->__wcrtomb = _UTF2_wcrtomb;
+	xrl->__mbsinit = _UTF2_mbsinit;
+	xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs;
+	xrl->__wcsnrtombs = _UTF2_wcsnrtombs;
+	xrl->__mb_cur_max = UTF2_MB_CUR_MAX;
 
 	return (0);
 }
 
-int
-_UTF8_mbsinit(const mbstate_t *ps)
+static int
+_UTF2_mbsinit(const mbstate_t *ps, locale_t loc)
 {
 
-	return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
+	return (ps == NULL || ((const _UTF2State *)ps)->want == 0);
 }
 
-size_t
-_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
-    mbstate_t * __restrict ps)
+static size_t
+_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
+    mbstate_t * __restrict ps, locale_t loc)
 {
-	_UTF8State *us;
+	_UTF2State *us;
 	int ch, i, mask, want;
 	wchar_t lbound, wch;
 
-	us = (_UTF8State *)ps;
+	us = (_UTF2State *)ps;
 
-	if (us->want < 0 || us->want > 6) {
+	if (us->want < 0 || us->want > 3) {
 		errno = EINVAL;
 		return ((size_t)-1);
 	}
@@ -130,21 +134,9 @@
 			mask = 0x0f;
 			want = 3;
 			lbound = 0x800;
-		} else if ((ch & 0xf8) == 0xf0) {
-			mask = 0x07;
-			want = 4;
-			lbound = 0x10000;
-		} else if ((ch & 0xfc) == 0xf8) {
-			mask = 0x03;
-			want = 5;
-			lbound = 0x200000;
-		} else if ((ch & 0xfc) == 0xfc) {
-			mask = 0x01;
-			want = 6;
-			lbound = 0x4000000;
 		} else {
 			/*
-			 * Malformed input; input is not UTF-8.
+			 * Malformed input; input is not UTF2.
 			 */
 			errno = EILSEQ;
 			return ((size_t)-1);
@@ -194,17 +186,17 @@
 	return (wch == L'\0' ? 0 : want);
 }
 
-size_t
-_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
-    size_t nms, size_t len, mbstate_t * __restrict ps)
+static size_t
+_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
+    size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc)
 {
-	_UTF8State *us;
+	_UTF2State *us;
 	const char *s;
 	size_t nchr;
 	wchar_t wc;
 	size_t nb;
 
-	us = (_UTF8State *)ps;
+	us = (_UTF2State *)ps;
 
 	s = *src;
 	nchr = 0;
@@ -226,7 +218,7 @@
 				 * excluding NUL.
 				 */
 				nb = 1;
-			else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
+			else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) ==
 			    (size_t)-1)
 				/* Invalid sequence - mbrtowc() sets errno. */
 				return ((size_t)-1);
@@ -256,7 +248,7 @@
 			 */
 			*dst = (wchar_t)*s;
 			nb = 1;
-		} else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
+		} else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) ==
 		    (size_t)-1) {
 			*src = s;
 			return ((size_t)-1);
@@ -276,14 +268,14 @@
 	return (nchr);
 }
 
-size_t
-_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
+static size_t
+_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc)
 {
-	_UTF8State *us;
+	_UTF2State *us;
 	unsigned char lead;
 	int i, len;
 
-	us = (_UTF8State *)ps;
+	us = (_UTF2State *)ps;
 
 	if (us->want != 0) {
 		errno = EINVAL;
@@ -315,15 +307,6 @@
 	} else if ((wc & ~0xffff) == 0) {
 		lead = 0xe0;
 		len = 3;
-	} else if ((wc & ~0x1fffff) == 0) {
-		lead = 0xf0;
-		len = 4;
-	} else if ((wc & ~0x3ffffff) == 0) {
-		lead = 0xf8;
-		len = 5;
-	} else if ((wc & ~0x7fffffff) == 0) {
-		lead = 0xfc;
-		len = 6;
 	} else {
 		errno = EILSEQ;
 		return ((size_t)-1);
@@ -344,17 +327,17 @@
 	return (len);
 }
 
-size_t
-_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
-    size_t nwc, size_t len, mbstate_t * __restrict ps)
+static size_t
+_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+    size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc)
 {
-	_UTF8State *us;
+	_UTF2State *us;
 	char buf[MB_LEN_MAX];
 	const wchar_t *s;
 	size_t nbytes;
 	size_t nb;
 
-	us = (_UTF8State *)ps;
+	us = (_UTF2State *)ps;
 
 	if (us->want != 0) {
 		errno = EINVAL;
@@ -369,7 +352,7 @@
 			if (0 <= *s && *s < 0x80)
 				/* Fast path for plain ASCII characters. */
 				nb = 1;
-			else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
+			else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) ==
 			    (size_t)-1)
 				/* Invalid character - wcrtomb() sets errno. */
 				return ((size_t)-1);
@@ -386,9 +369,9 @@
 			/* Fast path for plain ASCII characters. */
 			nb = 1;
 			*dst = *s;
-		} else if (len > (size_t)MB_CUR_MAX) {
+		} else if (len > (size_t)UTF2_MB_CUR_MAX) {
 			/* Enough space to translate in-place. */
-			if ((nb = (int)_UTF8_wcrtomb(dst, *s, ps)) < 0) {
+			if ((nb = (int)_UTF2_wcrtomb(dst, *s, ps, loc)) < 0) {
 				*src = s;
 				return ((size_t)-1);
 			}
@@ -396,7 +379,7 @@
 			/*
 			 * May not be enough space; use temp. buffer.
 			 */
-			if ((nb = (int)_UTF8_wcrtomb(buf, *s, ps)) < 0) {
+			if ((nb = (int)_UTF2_wcrtomb(buf, *s, ps, loc)) < 0) {
 				*src = s;
 				return ((size_t)-1);
 			}