strxfrm.c.patch   [plain text]


--- strxfrm.c.orig	2003-05-20 15:23:55.000000000 -0700
+++ strxfrm.c	2005-04-02 17:59:53.000000000 -0800
@@ -28,24 +28,59 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp $");
 
+#include "xlocale_private.h"
+
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
+#include <errno.h>
 #include "collate.h"
 
+/*
+ * In the non-POSIX case, we transform each character into a string of
+ * characters representing the character's priority.  Since char is usually
+ * signed, we are limited by 7 bits per byte.  To avoid zero, we need to add
+ * XFRM_OFFSET, so we can't use a full 7 bits.  For simplicity, we choose 6
+ * bits per byte.  We choose 4 bytes per character as a good compromise
+ * between maximum coverage and minimum size.  This gives 24 bits, or 16M
+ * priorities.  So we choose COLLATE_MAX_PRIORITY to be (2^24 - 1).  This
+ * this can be increased if more is needed.
+ */
+
+#define	XFRM_BYTES	4
+#define	XFRM_OFFSET	('0')	/* make all printable characters */
+#define	XFRM_SHIFT	6
+#define	XFRM_MASK	((1 << XFRM_SHIFT) - 1)
+
+static void
+xfrm(unsigned char *p, int pri)
+{
+
+	p[3] = (pri & XFRM_MASK) + XFRM_OFFSET;
+	pri >>= XFRM_SHIFT;
+	p[2] = (pri & XFRM_MASK) + XFRM_OFFSET;
+	pri >>= XFRM_SHIFT;
+	p[1] = (pri & XFRM_MASK) + XFRM_OFFSET;
+	pri >>= XFRM_SHIFT;
+	p[0] = (pri & XFRM_MASK) + XFRM_OFFSET;
+}
+
 size_t
-strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
+strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len,
+    locale_t loc)
 {
-	int prim, sec, l;
 	size_t slen;
-	char *s, *ss;
+	wchar_t *wcs, *xf[2];
+	int sverrno;
 
-	if (!*src) {
+	if (!*src && dest) {
 		if (len > 0)
 			*dest = '\0';
 		return 0;
 	}
 
-	if (__collate_load_error) {
+	NORMALIZE_LOCALE(loc);
+	if (loc->__collate_load_error || (wcs = __collate_mbstowcs(src, loc)) == NULL) {
 		slen = strlen(src);
 		if (len > 0) {
 			if (slen < len)
@@ -58,26 +93,63 @@
 		return slen;
 	}
 
-	slen = 0;
-	prim = sec = 0;
-	ss = s = __collate_substitute(src);
-	while (*s) {
-		while (*s && !prim) {
-			__collate_lookup(s, &l, &prim, &sec);
-			s += l;
+	__collate_xfrm(wcs, xf, loc);
+
+	slen = wcslen(xf[0]) * XFRM_BYTES;
+	if (xf[1])
+		slen += (wcslen(xf[1]) + 1) * XFRM_BYTES;
+	if (len > 0) {
+		wchar_t *w = xf[0];
+		int b = 0;
+		unsigned char buf[XFRM_BYTES];
+		unsigned char *bp;
+		while (len > 1) {
+			if (!b) {
+				if (!*w)
+					break;
+				xfrm(bp = buf, *w++);
+				b = XFRM_BYTES;
+			}
+			*dest++ = *(char *)bp++;
+			b--;
+			len--;
 		}
-		if (prim) {
-			if (len > 1) {
-				*dest++ = (char)prim;
+		if ((w = xf[1]) != NULL) {
+			xfrm(bp = buf, 0);
+			b = XFRM_BYTES;
+			while (len > 1) {
+				if (!b)
+					break;
+				*dest++ = *(char *)bp++;
+				b--;
+				len--;
+			}
+			b = 0;
+			while (len > 1) {
+				if (!b) {
+					if (!*w)
+						break;
+					xfrm(bp = buf, *w++);
+					b = XFRM_BYTES;
+				}
+				*dest++ = *(char *)bp++;
+				b--;
 				len--;
 			}
-			slen++;
-			prim = 0;
 		}
-	}
-	free(ss);
-	if (len > 0)
-		*dest = '\0';
+		*dest = 0;
+ 	}
+	sverrno = errno;
+	free(wcs);
+	free(xf[0]);
+	free(xf[1]);
+	errno = sverrno;
 
 	return slen;
 }
+
+size_t
+strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
+{
+	return strxfrm_l(dest, src, len, __current_locale());
+}