fnmatch.c.patch   [plain text]


--- fnmatch.c.orig	2004-11-25 11:38:00.000000000 -0800
+++ fnmatch.c	2005-03-30 14:33:09.000000000 -0800
@@ -40,6 +40,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.16 2004/07/29 03:13:10 tjr Exp $");
 
+#include "xlocale_private.h"
+
 /*
  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
  * Compares a filename or pathname to a pattern.
@@ -66,12 +68,15 @@
 
 #define	EOS	'\0'
 
+#if __DARWIN_UNIX03
+#define RETURN_ERROR	2	/* neither 0 or FNM_NOMATCH */
+#endif /* __DARWIN_UNIX03 */
 #define RANGE_MATCH     1
 #define RANGE_NOMATCH   0
 #define RANGE_ERROR     (-1)
 
-static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
-static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t);
+__private_extern__ int rangematch(const char *, wchar_t, const char *, int, char **, char **, mbstate_t *, mbstate_t *, locale_t);
+static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t, locale_t);
 
 int
 fnmatch(pattern, string, flags)
@@ -80,27 +85,32 @@
 {
 	static const mbstate_t initial;
 
-	return (fnmatch1(pattern, string, flags, initial, initial));
+	return (fnmatch1(pattern, string, flags, initial, initial, __current_locale()));
 }
 
 static int
-fnmatch1(pattern, string, flags, patmbs, strmbs)
+fnmatch1(pattern, string, flags, patmbs, strmbs, loc)
 	const char *pattern, *string;
 	int flags;
 	mbstate_t patmbs, strmbs;
+	locale_t loc;
 {
 	const char *stringstart;
-	char *newp;
+	char *newp, *news;
 	char c;
 	wchar_t pc, sc;
 	size_t pclen, sclen;
 
 	for (stringstart = string;;) {
-		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
+		pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc);
 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
+#if __DARWIN_UNIX03
+			return (RETURN_ERROR);
+#else /* !__DARWIN_UNIX03 */
 			return (FNM_NOMATCH);
+#endif /* __DARWIN_UNIX03 */
 		pattern += pclen;
-		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
+		sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc);
 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
 			sc = (unsigned char)*string;
 			sclen = 1;
@@ -150,10 +160,10 @@
 			/* General case, use recursion. */
 			while (sc != EOS) {
 				if (!fnmatch1(pattern, string,
-				    flags & ~FNM_PERIOD, patmbs, strmbs))
+				    flags & ~FNM_PERIOD, patmbs, strmbs, loc))
 					return (0);
-				sclen = mbrtowc(&sc, string, MB_LEN_MAX,
-				    &strmbs);
+				sclen = mbrtowc_l(&sc, string, MB_LEN_MAX,
+				    &strmbs, loc);
 				if (sclen == (size_t)-1 ||
 				    sclen == (size_t)-2) {
 					sc = (unsigned char)*string;
@@ -175,35 +185,45 @@
 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
 				return (FNM_NOMATCH);
 
-			switch (rangematch(pattern, sc, flags, &newp,
-			    &patmbs)) {
+			switch (rangematch(pattern, sc, string + sclen, flags,
+			    &newp, &news, &patmbs, &strmbs, loc)) {
 			case RANGE_ERROR:
+#if __DARWIN_UNIX03
+				return (RETURN_ERROR);
+#else /* !__DARWIN_UNIX03 */
 				goto norm;
+#endif /* __DARWIN_UNIX03 */
 			case RANGE_MATCH:
 				pattern = newp;
+				string = news;
 				break;
 			case RANGE_NOMATCH:
 				return (FNM_NOMATCH);
 			}
-			string += sclen;
 			break;
 		case '\\':
 			if (!(flags & FNM_NOESCAPE)) {
-				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
-				    &patmbs);
+				pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX,
+				    &patmbs, loc);
 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
+#if __DARWIN_UNIX03
+					return (RETURN_ERROR);
+#else /* !__DARWIN_UNIX03 */
 					return (FNM_NOMATCH);
+#endif /* __DARWIN_UNIX03 */
 				if (pclen == 0)
 					pc = '\\';
 				pattern += pclen;
 			}
 			/* FALLTHROUGH */
 		default:
+#if !__DARWIN_UNIX03
 		norm:
+#endif /* !__DARWIN_UNIX03 */
 			if (pc == sc)
 				;
 			else if ((flags & FNM_CASEFOLD) &&
-				 (towlower(pc) == towlower(sc)))
+				 (towlower_l(pc, loc) == towlower_l(sc, loc)))
 				;
 			else
 				return (FNM_NOMATCH);
@@ -214,18 +234,22 @@
 	/* NOTREACHED */
 }
 
-static int
-rangematch(pattern, test, flags, newp, patmbs)
-	const char *pattern;
+#ifndef BUILDING_VARIANT
+__private_extern__ int
+rangematch(pattern, test, string, flags, newp, news, patmbs, strmbs, loc)
+	const char *pattern, *string;
 	wchar_t test;
 	int flags;
-	char **newp;
-	mbstate_t *patmbs;
+	char **newp, **news;
+	mbstate_t *patmbs, *strmbs;
+	locale_t loc;
 {
-	int negate, ok;
+	int negate, ok, special;
 	wchar_t c, c2;
-	size_t pclen;
-	const char *origpat;
+	wchar_t buf[STR_LEN];	/* STR_LEN defined in collate.h */
+	size_t pclen, sclen, len;
+	const char *origpat, *cp, *savestring;
+	mbstate_t save;
 
 	/*
 	 * A bracket expression starting with an unquoted circumflex
@@ -238,7 +262,7 @@
 		++pattern;
 
 	if (flags & FNM_CASEFOLD)
-		test = towlower(test);
+		test = towlower_l(test, loc);
 
 	/*
 	 * A right bracket shall lose its special meaning and represent
@@ -248,8 +272,8 @@
 	ok = 0;
 	origpat = pattern;
 	for (;;) {
+		c = 0;
 		if (*pattern == ']' && pattern > origpat) {
-			pattern++;
 			break;
 		} else if (*pattern == '\0') {
 			return (RANGE_ERROR);
@@ -258,39 +282,188 @@
 			return (RANGE_NOMATCH);
 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
 			pattern++;
-		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
-		if (pclen == (size_t)-1 || pclen == (size_t)-2)
-			return (RANGE_NOMATCH);
-		pattern += pclen;
+		else if (*pattern == '[' && ((special = *(pattern + 1)) == '.' || special == '=' || special == ':')) {
+			cp = (pattern += 2);
+			while(cp = strchr(cp, special)) {
+				if (*(cp + 1) == ']')
+					break;
+				cp++;
+			}
+			if (!cp)
+				return (RANGE_ERROR);
+			if (special == '.') {
+treat_like_collating_symbol:
+				len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc);
+				if (len == (size_t)-1 || len == 0)
+					return (RANGE_ERROR);
+				pattern = cp + 2;
+				if (len > 1) {
+					wchar_t *wp, sc;
+					/* no multi-character collation symbols as start of range */
+					if (*(cp + 2) == '-' && *(cp + 3) != EOS
+					    && *(cp + 3) != ']')
+						return (RANGE_ERROR);
+					wp = buf;
+					if (test != *wp++)
+						continue;
+					if (len == 1) {
+						ok = 1;
+						break;
+					}
+					memcpy(&save, strmbs, sizeof(save));
+					savestring = string;
+					while (--len > 0) {
+						sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, strmbs, loc);
+						if (sclen == (size_t)-1 || sclen == (size_t)-2) {
+							sc = (unsigned char)*string;
+							sclen = 1;
+							memset(&strmbs, 0, sizeof(strmbs));
+						}
+						if (sc != *wp++) {
+							memcpy(strmbs, &save, sizeof(save));
+							string = savestring;
+							break;
+						}
+						string += sclen;
+					}
+					if (len == 0) {
+						ok = 1;
+						break;
+					}
+					continue; /* no match */
+				}
+				c = *buf;
+			} else if (special == '=') {
+				int ec;
+				memcpy(&save, patmbs, sizeof(save));
+				ec = __collate_equiv_class(pattern, cp - pattern, patmbs, loc);
+				if (ec < 0)
+					return (RANGE_ERROR);
+				if (ec == 0) {
+					memcpy(patmbs, &save, sizeof(save));
+					goto treat_like_collating_symbol;
+				}
+				pattern = cp + 2;
+				/* no equivalence classes as start of range */
+				if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
+				    *(cp + 3) != ']')
+					return (RANGE_ERROR);
+				len = __collate_equiv_match(ec, NULL, 0, test, string, strlen(string), strmbs, &sclen, loc);
+				if (len < 0)
+					return (RANGE_ERROR);
+				if (len > 0) {
+					ok = 1;
+					string += sclen;
+					break;
+				}
+				continue;
+			} else { /* special == ':' */
+				wctype_t charclass;
+				char name[CHARCLASS_NAME_MAX + 1];
+				/* no character classes as start of range */
+				if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
+				    *(cp + 3) != ']')
+					return (RANGE_ERROR);
+				/* assume character class names are ascii */
+				if (cp - pattern > CHARCLASS_NAME_MAX)
+					return (RANGE_ERROR);
+				strlcpy(name, pattern, cp - pattern + 1);
+				pattern = cp + 2;
+				if ((charclass = wctype(name)) == 0)
+					return (RANGE_ERROR);
+				if (iswctype_l(test, charclass, loc)) {
+					ok = 1;
+					break;
+				}
+				continue;
+			}
+		}
+		if (!c) {
+			pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
+			if (pclen == (size_t)-1 || pclen == (size_t)-2)
+				return (RANGE_ERROR);
+			pattern += pclen;
+		}
 
 		if (flags & FNM_CASEFOLD)
-			c = towlower(c);
+			c = towlower_l(c, loc);
 
 		if (*pattern == '-' && *(pattern + 1) != EOS &&
 		    *(pattern + 1) != ']') {
 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
 				if (*pattern != EOS)
 					pattern++;
-			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
+			pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs, loc);
 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
-				return (RANGE_NOMATCH);
+				return (RANGE_ERROR);
 			pattern += pclen;
 			if (c2 == EOS)
 				return (RANGE_ERROR);
 
+			if (c2 == '[' && (special = *pattern) == '.' || special == '=' || special == ':') {
+				/* no equivalence classes or character classes as end of range */
+				if (special == '=' || special == ':')
+					return (RANGE_ERROR);
+				cp = ++pattern;
+				while(cp = strchr(cp, special)) {
+					if (*(cp + 1) == ']')
+						break;
+					cp++;
+				}
+				if (!cp)
+					return (RANGE_ERROR);
+				len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc);
+				/* no multi-character collation symbols as end of range */
+				if (len != 1)
+					return (RANGE_ERROR);
+				pattern = cp + 2;
+				c2 = *buf;
+			}
+
 			if (flags & FNM_CASEFOLD)
-				c2 = towlower(c2);
+				c2 = towlower_l(c2, loc);
 
-			if (__collate_load_error ?
+			if (loc->__collate_load_error ?
 			    c <= test && test <= c2 :
-			       __collate_range_cmp(c, test) <= 0
-			    && __collate_range_cmp(test, c2) <= 0
-			   )
+			       __collate_range_cmp(c, test, loc) <= 0
+			    && __collate_range_cmp(test, c2, loc) <= 0
+			   ) {
 				ok = 1;
-		} else if (c == test)
+				break;
+			}
+		} else if (c == test) {
 			ok = 1;
+			break;
+		}
 	}
+	/* go to end of bracket expression */
+	special = 0;
+	while(*pattern != ']') {
+		if (*pattern == 0)
+			return (RANGE_ERROR);
+		if (*pattern == special) {
+			if (*++pattern == ']') {
+				special = 0;
+				pattern++;
+			}
+			continue;
+		}
+		if (!special && *pattern == '[') {
+			special = *++pattern;
+			if (special != '.' && special != '=' && special != ':')
+				special = 0;
+			else
+				pattern++;
+			continue;
+		}
+		pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
+		if (pclen == (size_t)-1 || pclen == (size_t)-2)
+			return (RANGE_ERROR);
+		pattern += pclen;
+ 	}
 
-	*newp = (char *)pattern;
+	*newp = (char *)++pattern;
+	*news = (char *)string;
 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
 }
+#endif /* BUILDING_VARIANT */