PR-3715846.diff   [plain text]


--- src/dfa.c.orig	2005-05-11 09:24:42.000000000 -0700
+++ src/dfa.c	2005-05-11 09:35:53.000000000 -0700
@@ -707,6 +707,34 @@
   return strncmp(s, lexptr, len) == 0;
 }
 
+/*
+ * there are currently no public APIs to access collating symbols or
+ * equivalence classes.
+ */
+char *try_collating_thing(char const *ct, int ct_len, int *chars_used) {
+    char *term;
+    if (*ct == '.') {
+	term = strnstr(++ct, ".]", ct_len);
+    } else if (*ct == '=') {
+	term = strnstr(++ct, "=]", ct_len);
+    } else {
+	term = NULL;
+    }
+    if (!term) {
+	*chars_used = 0;
+	return NULL;
+    }
+    int slen = 1 + (term - ct);
+    *chars_used = 2 + slen;
+    char *t = malloc(slen);
+    if (!t) {
+	return NULL;
+    }
+    strlcpy(t, ct, slen);
+    return t;
+}
+
+
 static token
 lex (void)
 {
@@ -1014,20 +1042,39 @@
 		 characters.  We can do this because we assume
 		 regex has checked for syntax errors before
 		 dfa is ever called. */
-	      if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
-		for (c1 = 0; prednames[c1].name; ++c1)
-		  if (looking_at(prednames[c1].name))
-		    {
-		      int (*pred) PARAMS ((int)) = prednames[c1].pred;
-
-		      for (c2 = 0; c2 < NOTCHAR; ++c2)
-			if ((*pred)(c2))
-			  setbit_case_fold (c2, ccl);
-		      lexptr += strlen(prednames[c1].name);
-		      lexleft -= strlen(prednames[c1].name);
-		      FETCH(c1, _("Unbalanced ["));
-		      goto skip;
-		    }
+	      if (c == '[' && (syntax_bits & RE_CHAR_CLASSES)) {
+		if (lexleft >= 1 && (*lexptr == '.' || *lexptr == '=')) {
+		  int used = 0;
+		  char *match = try_collating_thing(lexptr, lexleft, &used);
+		  if (!match) {
+		    dfaerror(_("invalid collating element or class"));
+		  } else {
+		      char *cp = match;
+		      for(; *cp; ++cp) {
+			setbit(*cp, ccl);
+		      }
+		      free(match);
+		  }
+		  lexptr += used;
+		  lexleft -= used;
+		  FETCH(c1, _("Unbalanced ["));
+		  goto skip;
+		} else {
+		  for (c1 = 0; prednames[c1].name; ++c1)
+		    if (looking_at(prednames[c1].name))
+		      {
+			int (*pred) PARAMS ((int)) = prednames[c1].pred;
+
+			for (c2 = 0; c2 < NOTCHAR; ++c2)
+			  if ((*pred)(c2))
+			    setbit_case_fold (c2, ccl);
+			lexptr += strlen(prednames[c1].name);
+			lexleft -= strlen(prednames[c1].name);
+			FETCH(c1, _("Unbalanced ["));
+			goto skip;
+		      }
+		}
+	      }
 	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
 		FETCH(c, _("Unbalanced ["));
 	      FETCH(c1, _("Unbalanced ["));