regex2.h.patch   [plain text]


--- regex2.h.bsdnew	2009-11-11 11:29:04.000000000 -0800
+++ regex2.h	2009-11-11 12:18:35.000000000 -0800
@@ -120,14 +120,23 @@ typedef struct {
 	int		nranges;
 	int		invert;
 	int		icase;
+	int		*equiv_classes;
+	int		nequiv_classes;
 } cset;
 
+#include "collate.h"
+
 static int
-CHIN1(cset *cs, wint_t ch)
+CHIN1(cset *cs, wint_t ch, locale_t loc)
 {
 	int i;
 
 	assert(ch >= 0);
+	for (i = 0; i < cs->nequiv_classes; i++)
+		/* sadly, we can only deal with single characters from an
+		 * equivalence class */
+		if (__collate_equiv_match(cs->equiv_classes[i], NULL, 0, ch, NULL, 0, NULL, NULL, loc) > 0)
+			return (!cs->invert);
 	if (ch < NC)
 		return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
 		    cs->invert);
@@ -138,24 +147,24 @@ CHIN1(cset *cs, wint_t ch)
 		if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
 			return (!cs->invert);
 	for (i = 0; i < cs->ntypes; i++)
-		if (iswctype(ch, cs->types[i]))
+		if (iswctype_l(ch, cs->types[i], loc))
 			return (!cs->invert);
 	return (cs->invert);
 }
 
 static __inline int
-CHIN(cset *cs, wint_t ch)
+CHIN(cset *cs, wint_t ch, locale_t loc)
 {
 
 	assert(ch >= 0);
-	if (ch < NC)
+	if (ch < NC && cs->nequiv_classes == 0)
 		return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
 		    cs->invert);
 	else if (cs->icase)
-		return (CHIN1(cs, ch) || CHIN1(cs, towlower(ch)) ||
-		    CHIN1(cs, towupper(ch)));
+		return (CHIN1(cs, ch, loc) || CHIN1(cs, towlower_l(ch, loc), loc) ||
+		    CHIN1(cs, towupper_l(ch, loc), loc));
 	else
-		return (CHIN1(cs, ch));
+		return (CHIN1(cs, ch, loc));
 }
 
 /*
@@ -185,8 +194,9 @@ struct re_guts {
 	size_t nsub;		/* copy of re_nsub */
 	int backrefs;		/* does it use back references? */
 	sopno nplus;		/* how deep does it nest +s? */
+	locale_t loc;		/* current locale */
 };
 
 /* misc utilities */
-#define	OUT	(CHAR_MIN - 1)	/* a non-character value */
-#define ISWORD(c)       (iswalnum((uch)(c)) || (c) == '_')
+#define	OUT	(CHAR_MIN - 2)	/* a non-character value */
+#define ISWORD(c,l)     (iswalnum_l((uch)(c), l) || (c) == '_')