--- src/dfa.c.orig 2005-05-11 09:24:42.000000000 -0700 +++ src/dfa.c 2005-05-11 09:35:53.000000000 -0700 @@ -707,6 +707,34 @@ return strncmp(s, lexptr, len) == 0; } +/* + * there are currently no public APIs to access collating symbols or + * equivalence classes. + */ +char *try_collating_thing(char const *ct, int ct_len, int *chars_used) { + char *term; + if (*ct == '.') { + term = strnstr(++ct, ".]", ct_len); + } else if (*ct == '=') { + term = strnstr(++ct, "=]", ct_len); + } else { + term = NULL; + } + if (!term) { + *chars_used = 0; + return NULL; + } + int slen = 1 + (term - ct); + *chars_used = 2 + slen; + char *t = malloc(slen); + if (!t) { + return NULL; + } + strlcpy(t, ct, slen); + return t; +} + + static token lex (void) { @@ -1014,20 +1042,39 @@ characters. We can do this because we assume regex has checked for syntax errors before dfa is ever called. */ - if (c == '[' && (syntax_bits & RE_CHAR_CLASSES)) - for (c1 = 0; prednames[c1].name; ++c1) - if (looking_at(prednames[c1].name)) - { - int (*pred) PARAMS ((int)) = prednames[c1].pred; - - for (c2 = 0; c2 < NOTCHAR; ++c2) - if ((*pred)(c2)) - setbit_case_fold (c2, ccl); - lexptr += strlen(prednames[c1].name); - lexleft -= strlen(prednames[c1].name); - FETCH(c1, _("Unbalanced [")); - goto skip; - } + if (c == '[' && (syntax_bits & RE_CHAR_CLASSES)) { + if (lexleft >= 1 && (*lexptr == '.' || *lexptr == '=')) { + int used = 0; + char *match = try_collating_thing(lexptr, lexleft, &used); + if (!match) { + dfaerror(_("invalid collating element or class")); + } else { + char *cp = match; + for(; *cp; ++cp) { + setbit(*cp, ccl); + } + free(match); + } + lexptr += used; + lexleft -= used; + FETCH(c1, _("Unbalanced [")); + goto skip; + } else { + for (c1 = 0; prednames[c1].name; ++c1) + if (looking_at(prednames[c1].name)) + { + int (*pred) PARAMS ((int)) = prednames[c1].pred; + + for (c2 = 0; c2 < NOTCHAR; ++c2) + if ((*pred)(c2)) + setbit_case_fold (c2, ccl); + lexptr += strlen(prednames[c1].name); + lexleft -= strlen(prednames[c1].name); + FETCH(c1, _("Unbalanced [")); + goto skip; + } + } + } if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) FETCH(c, _("Unbalanced [")); FETCH(c1, _("Unbalanced ["));