#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <stddef.h>
#include "xmalloc.h"
#include "tre-mem.h"
#include "tre-ast.h"
#include "tre-stack.h"
#include "tre-parse.h"
#define BSD_COMPATIBILITY
#ifdef BSD_COMPATIBILITY
#include "cname.h"
#define ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
#endif
#define CHAR_PIPE L'|'
#define CHAR_LPAREN L'('
#define CHAR_RPAREN L')'
#define CHAR_LBRACE L'{'
#define CHAR_RBRACE L'}'
#define CHAR_LBRACKET L'['
#define CHAR_RBRACKET L']'
#define CHAR_MINUS L'-'
#define CHAR_STAR L'*'
#define CHAR_QUESTIONMARK L'?'
#define CHAR_PLUS L'+'
#define CHAR_PERIOD L'.'
#define CHAR_COLON L':'
#define CHAR_EQUAL L'='
#define CHAR_COMMA L','
#define CHAR_CARET L'^'
#define CHAR_DOLLAR L'$'
#define CHAR_BACKSLASH L'\\'
#define CHAR_HASH L'#'
#define CHAR_TILDE L'~'
static const struct tre_macro_struct {
const char c;
const char *expansion;
} tre_macros[] =
{ {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
{'f', "\f"}, {'a', "\a"}, {'e', "\033"},
{'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
{'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
{ 0, NULL }
};
static void
tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
tre_char_t *buf, size_t buf_len)
{
int i;
buf[0] = 0;
if (regex >= regex_end)
return;
for (i = 0; tre_macros[i].expansion; i++)
{
if (tre_macros[i].c == *regex)
{
unsigned int j;
DPRINT(("Expanding macro '%c' => '%s'\n",
tre_macros[i].c, tre_macros[i].expansion));
for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
buf[j] = tre_macros[i].expansion[j];
buf[j] = 0;
break;
}
}
}
static reg_errcode_t
tre_new_item(tre_mem_t __unused mem, int type, int val, int *max_i,
tre_bracket_match_list_t **items)
{
reg_errcode_t status = REG_OK;
tre_bracket_match_list_t *array = *items;
int i = array->num_bracket_matches;
if (i >= *max_i)
{
tre_bracket_match_list_t *new_items;
DPRINT(("out of tre_bracket_match_list_t array space (%d)\n", i));
if (*max_i >= 1024)
return REG_ESPACE;
*max_i *= 2;
new_items = xrealloc(array, SIZEOF_BRACKET_MATCH_LIST_N(*max_i));
if (new_items == NULL)
return REG_ESPACE;
*items = array = new_items;
}
array->bracket_matches[i].type = type;
array->bracket_matches[i].value = val;
array->num_bracket_matches++;
return status;
}
#ifndef TRE_USE_SYSTEM_WCTYPE
int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
#ifdef tre_isascii
int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
#else
int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
#endif
#ifdef tre_isblank
int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
#else
int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
#endif
int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
struct {
char *name;
int (*func)(tre_cint_t);
} tre_ctype_map[] = {
{ "alnum", &tre_isalnum_func },
{ "alpha", &tre_isalpha_func },
#ifdef tre_isascii
{ "ascii", &tre_isascii_func },
#endif
#ifdef tre_isblank
{ "blank", &tre_isblank_func },
#endif
{ "cntrl", &tre_iscntrl_func },
{ "digit", &tre_isdigit_func },
{ "graph", &tre_isgraph_func },
{ "lower", &tre_islower_func },
{ "print", &tre_isprint_func },
{ "punct", &tre_ispunct_func },
{ "space", &tre_isspace_func },
{ "upper", &tre_isupper_func },
{ "xdigit", &tre_isxdigit_func },
{ NULL, NULL}
};
tre_ctype_t tre_ctype(const char *name)
{
int i;
for (i = 0; tre_ctype_map[i].name != NULL; i++)
{
if (strcmp(name, tre_ctype_map[i].name) == 0)
return tre_ctype_map[i].func;
}
return (tre_ctype_t)0;
}
#endif
#define REST(re) (int)(ctx->re_end - (re)), (re)
#define START_COLLATING_SYMBOLS 16
#define MAX_COLLATING_SYMBOL_LEN 4
typedef struct {
const tre_char_t *start;
int len;
} tre_collating_symbol;
#include <xlocale.h>
int __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len);
#ifdef BSD_COMPATIBILITY
static wchar_t
tre_search_cnames(const wchar_t *name, size_t len)
{
size_t low = 0;
size_t high = NCNAMES - 1;
size_t cur;
int cmp;
while(low <= high)
{
cur = (low + high) / 2;
cmp = wcsncmp(name, cnames[cur].name, len);
if (cmp == 0 && cnames[cur].name[len] == 0) return cnames[cur].code;
if (cmp > 0) low = cur + 1;
else high = cur - 1;
}
return (wchar_t)-1;
}
#endif
static reg_errcode_t
tre_parse_bracket_items(tre_parse_ctx_t *ctx, tre_bracket_match_list_t **items,
int *items_size, tre_collating_symbol **result)
{
const tre_char_t *re = ctx->re;
const tre_char_t *re_end = ctx->re_end;
tre_collating_symbol *col_syms = NULL;
tre_collating_symbol *cp = NULL;
int n_col_syms = 0;
reg_errcode_t status;
int max_i = *items_size;
int other = 0;
int range = -1;
tre_cint_t min, c;
int invert = ((*items)->flags & TRE_BRACKET_MATCH_FLAG_NEGATE);
int collect_MCCS = 0;
const tre_char_t *start;
for ( ;re < re_end; re++)
{
switch (*re)
{
case CHAR_MINUS:
if (re == ctx->re)
{
DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
min = CHAR_MINUS;
other++;
range = 0;
break;
}
if (range > 0)
{
DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
c = CHAR_MINUS;
goto process_end_range;
}
if (re + 1 >= re_end)
{
status = REG_EBRACK;
goto error;
}
if (re[1] == CHAR_RBRACKET)
{
DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
c = CHAR_MINUS;
goto process_begin_range;
}
if (range < 0)
{
status = REG_ERANGE;
goto error;
}
range = 1;
DPRINT(("tre_parse_bracket: range: '%.*" STRF "'\n", REST(re)));
break;
case CHAR_LBRACKET:
if (re + 1 >= re_end)
{
status = REG_EBRACK;
goto error;
}
switch (re[1])
{
case CHAR_PERIOD:
{
re += 2;
start = re;
for (;; re++)
{
if (re >= re_end)
{
status = REG_ECOLLATE;
goto error;
}
if (*re == CHAR_PERIOD)
{
if (re + 1 >= re_end)
{
status = REG_ECOLLATE;
goto error;
}
if (re[1] == CHAR_RBRACKET)
{
DPRINT(("tre_parse_bracket: collating "
"symbol: '%.*" STRF "'\n",
REST(start - 2)));
if (re == start)
{
status = REG_ECOLLATE;
goto error;
}
#ifdef BSD_COMPATIBILITY
c = tre_search_cnames(start, re - start);
if (c != (wchar_t)-1)
{
re++;
goto process_single_character;
}
#endif
if (__collate_equiv_value(ctx->loc, start,
re - start) <= 0)
{
status = REG_ECOLLATE;
goto error;
}
if (re - start == 1)
{
c = *start;
re++;
goto process_single_character;
}
if (invert)
{
status = REG_ECOLLATE;
goto error;
}
if (range > 0)
{
status = REG_ERANGE;
goto error;
}
range = -1;
#if TRE_DEBUG
if (!collect_MCCS)
{
collect_MCCS = 1;
DPRINT(("tre_parse_bracket: Detected MCCS\n"));
}
#else
collect_MCCS = 1;
#endif
if (!cp)
{
if ((col_syms = xmalloc(sizeof(*col_syms) *
(START_COLLATING_SYMBOLS + 2)))
== NULL)
return REG_ESPACE;
cp = col_syms + 1;
n_col_syms = START_COLLATING_SYMBOLS;
}
if ((cp - col_syms) - 1 >= n_col_syms)
{
int i = n_col_syms;
tre_collating_symbol *tmp =
xrealloc(col_syms, sizeof(*col_syms) *
((n_col_syms *= 2) + 2));
if (tmp == NULL)
{
xfree(col_syms);
return REG_ESPACE;
}
DPRINT(("tre_list_collating_symbols: "
"Enlarging col_syms to %d\n",
n_col_syms));
col_syms = tmp;
cp = col_syms + i + 1;
}
cp->start = start;
cp->len = re - start;
cp++;
re++;
break;
}
}
}
break;
}
case CHAR_EQUAL:
case CHAR_COLON:
{
tre_char_t kind = re[1];
if (range > 0)
{
status = REG_ERANGE;
goto error;
}
if (!collect_MCCS && range == 0)
{
status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
min, &max_i, items);
if (status != REG_OK)
goto error;
}
range = -1;
re += 2;
start = re;
for (;; re++)
{
if (re >= re_end)
{
status = kind == CHAR_EQUAL ? REG_ECOLLATE : REG_ECTYPE;
goto error;
}
if (*re == kind)
{
if (re + 1 >= re_end)
{
status = kind == CHAR_EQUAL ? REG_ECOLLATE :
REG_ECTYPE;
goto error;
}
if (re[1] == CHAR_RBRACKET)
{
if (re == start)
{
status = kind == CHAR_EQUAL ? REG_ECOLLATE :
REG_ECTYPE;
goto error;
}
if (kind == CHAR_EQUAL)
{
int equiv;
DPRINT(("tre_parse_bracket: equivalence: '%.*"
STRF "'\n", REST(start - 2)));
if ((equiv = __collate_equiv_value(ctx->loc,
start, re - start)) <= 0)
{
#ifdef BSD_COMPATIBILITY
c = tre_search_cnames(start, re - start);
if (c != (wchar_t)-1)
{
re++;
goto process_single_character;
}
#endif
status = REG_ECOLLATE;
goto error;
}
if (!collect_MCCS)
{
status = tre_new_item(ctx->mem,
TRE_BRACKET_MATCH_TYPE_EQUIVALENCE,
equiv, &max_i, items);
if (status != REG_OK)
goto error;
}
}
else
{
DPRINT(("tre_parse_bracket: class: '%.*" STRF
"'\n", REST(start - 2)));
if (!collect_MCCS)
{
char tmp_str[64];
tre_ctype_t class;
int len = MIN(re - start, 63);
#ifdef TRE_WCHAR
{
tre_char_t tmp_wcs[64];
wcsncpy(tmp_wcs, start, (size_t)len);
tmp_wcs[len] = L'\0';
#if defined HAVE_WCSRTOMBS
{
mbstate_t state;
const tre_char_t *src = tmp_wcs;
memset(&state, '\0', sizeof(state));
len = wcsrtombs_l(tmp_str, &src,
sizeof(tmp_str), &state,
ctx->loc);
}
#elif defined HAVE_WCSTOMBS
len = wcstombs(tmp_str, tmp_wcs, 63);
#endif
}
#else
strncpy(tmp_str, (const char*)start, len);
#endif
tmp_str[len] = '\0';
DPRINT((" class name: %s\n", tmp_str));
class = tre_ctype_l(tmp_str, ctx->loc);
if (!class)
{
status = REG_ECTYPE;
goto error;
}
status = tre_new_item(ctx->mem,
TRE_BRACKET_MATCH_TYPE_CLASS,
class, &max_i, items);
if (status != REG_OK)
goto error;
}
}
re++;
break;
}
}
}
other++;
break;
}
default:
DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
c = CHAR_LBRACKET;
goto process_single_character;
break;
}
break;
case CHAR_RBRACKET:
if (re == ctx->re)
{
DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
min = CHAR_RBRACKET;
range = 0;
other++;
break;
}
if (collect_MCCS)
{
DPRINT(("tre_parse_bracket: done: '%.*" STRF "'\n",
REST(re)));
if (col_syms)
{
col_syms->start = re + 1;
col_syms->len = other;
cp->start = NULL;
}
*result = col_syms;
return REG_OK;
}
if (range == 0)
{
status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
min, &max_i, items);
if (status != REG_OK)
goto error;
}
DPRINT(("tre_parse_bracket: done: '%.*" STRF "'\n", REST(re)));
*items_size = max_i;
ctx->re = re + 1;
return REG_OK;
default:
DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
c = *re;
process_single_character:
if (range > 0)
{
int mine, maxe;
process_end_range:
mine = __collate_equiv_value(ctx->loc, &min, 1);
maxe = __collate_equiv_value(ctx->loc, &c, 1);
if (maxe < mine)
{
status = REG_ERANGE;
goto error;
}
if (!collect_MCCS)
{
status = tre_new_item(ctx->mem,
TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN,
mine, &max_i, items);
if (status != REG_OK)
goto error;
status = tre_new_item(ctx->mem,
TRE_BRACKET_MATCH_TYPE_RANGE_END,
maxe, &max_i, items);
if (status != REG_OK)
goto error;
}
range = -1;
}
else
{
process_begin_range:
if (!collect_MCCS)
{
if (range == 0)
{
status = tre_new_item(ctx->mem,
TRE_BRACKET_MATCH_TYPE_CHAR,
min, &max_i, items);
if (status != REG_OK)
goto error;
}
min = c;
}
range = 0;
}
other++;
break;
}
}
status = REG_EBRACK;
error:
DPRINT(("tre_parse_bracket: error: '%.*" STRF "', status=%d\n",
REST(re), status));
if (col_syms)
xfree(col_syms);
return status;
}
#ifdef TRE_DEBUG
static const char *bracket_match_type_str[] = {
"unused",
"char",
"range begin",
"range end",
"class",
"equivalence value",
};
#endif
static reg_errcode_t
tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
{
tre_ast_node_t *node = NULL;
reg_errcode_t status = REG_OK;
tre_bracket_match_list_t *items;
int max_i = 32;
tre_collating_symbol *col_syms = NULL;
if (ctx->re_end - ctx->re >= 6 && ctx->re[0] == CHAR_LBRACKET
&& ctx->re[1] == CHAR_COLON && (ctx->re[2] == L'<' || ctx->re[2] == L'>')
&& ctx->re[3] == CHAR_COLON && ctx->re[4] == CHAR_RBRACKET
&& ctx->re[5] == CHAR_RBRACKET)
{
*result = tre_ast_new_literal(ctx->mem, ASSERTION,
(ctx->re[2] == L'<') ? ASSERT_AT_BOW : ASSERT_AT_EOW,
-1);
DPRINT(("tre_parse_bracket: special case %s\n", (ctx->re[2] == L'<') ?
"[[:<:]]" : "[[:>:]]"));
ctx->re += 6;
return *result ? REG_OK : REG_ESPACE;
}
items = xcalloc(1, SIZEOF_BRACKET_MATCH_LIST_N(max_i));
if (items == NULL)
return REG_ESPACE;
if (*ctx->re == CHAR_CARET)
{
DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
items->flags |= TRE_BRACKET_MATCH_FLAG_NEGATE;
ctx->re++;
}
status = tre_parse_bracket_items(ctx, &items, &max_i, &col_syms);
if (status != REG_OK)
goto parse_bracket_done;
if (col_syms)
{
tre_char_t *str, *sp;
tre_collating_symbol *cp;
tre_parse_ctx_t subctx;
xfree(items);
str = xmalloc(sizeof(*str) * ((col_syms->start - ctx->re) + 2));
if (str == NULL)
{
xfree(col_syms);
return REG_ESPACE;
}
sp = str;
if (col_syms->len > 0)
{
const tre_char_t *re;
ptrdiff_t i;
*sp++ = '[';
re = ctx->re;
for (cp = col_syms + 1; cp->start; cp++)
{
if ((i = ((cp->start - re) - 2)) > 0)
{
memcpy(sp, re, sizeof(*sp) * i);
sp += i;
}
re = cp->start + cp->len + 2;
}
i = col_syms->start - re;
memcpy(sp, re, sizeof(*sp) * i);
sp += i;
*sp++ = '|';
}
for (cp = col_syms + 1; cp->start; cp++)
{
memcpy(sp, cp->start, sizeof(*sp) * cp->len);
sp += cp->len;
if (cp[1].start)
*sp++ = '|';
}
*sp = 0;
DPRINT(("tre_parse_bracket: Reparsing bracket expression with '%ls'\n",
str));
memcpy(&subctx, ctx, sizeof(subctx));
subctx.re = str;
subctx.len = sp - str;
subctx.nofirstsub = 1;
subctx.cflags |= REG_EXTENDED;
status = tre_parse(&subctx);
xfree(str);
if (status != REG_OK)
{
xfree(col_syms);
return status;
}
ctx->re = col_syms->start;
ctx->position = subctx.position;
xfree(col_syms);
*result = subctx.result;
DPRINT(("tre_parse_bracket: Returning to original string\n"));
return REG_OK;
}
DPRINT(("tre_parse_bracket: creating bracket expression literal\n"));
node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position);
if (node == NULL)
{
status = REG_ESPACE;
goto parse_bracket_done;
}
else
{
tre_literal_t *l = node->obj;
l->u.bracket_match_list = tre_mem_alloc(ctx->mem,
SIZEOF_BRACKET_MATCH_LIST(items));
if (l->u.bracket_match_list == NULL)
{
status = REG_ESPACE;
goto parse_bracket_done;
}
memcpy(l->u.bracket_match_list, items, SIZEOF_BRACKET_MATCH_LIST(items));
}
#ifdef TRE_DEBUG
{
int i;
tre_bracket_match_t *b;
DPRINT(("tre_parse_bracket: %d bracket match items, flags 0x%x\n",
items->num_bracket_matches, items->flags));
for (i = 0, b = items->bracket_matches;
i < items->num_bracket_matches; i++, b++)
{
DPRINT((" %d: %s %d\n", i, bracket_match_type_str[b->type],
b->value));
}
}
#endif
parse_bracket_done:
xfree(items);
ctx->position++;
*result = node;
return status;
}
static int
tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
{
int num = -1;
const tre_char_t *r = *regex;
while (r < regex_end && *r >= L'0' && *r <= L'9')
{
if (num < 0)
num = 0;
num = num * 10 + *r - L'0';
r++;
}
*regex = r;
return num;
}
static reg_errcode_t
tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
{
int min, max;
#ifdef TRE_APPROX
int i;
int cost_ins, cost_del, cost_subst, cost_max;
int limit_ins, limit_del, limit_subst, limit_err;
const tre_char_t *start;
#endif
const tre_char_t *r = ctx->re;
int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
#ifdef TRE_APPROX
int approx = 0;
int costs_set = 0;
int counts_set = 0;
cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
#endif
min = -1;
if (r >= ctx->re_end)
#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_EBRACE;
#else
return REG_EBRACE;
#endif
if (*r >= L'0' && *r <= L'9') {
DPRINT(("tre_parse: min count: '%.*" STRF "'\n", REST(r)));
min = tre_parse_int(&r, ctx->re_end);
}
#ifndef TRE_APPROX
else
#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_BADBR;
#else
return REG_BADBR;
#endif
#endif
max = min;
if (r < ctx->re_end && *r == CHAR_COMMA)
{
r++;
DPRINT(("tre_parse: max count: '%.*" STRF "'\n", REST(r)));
max = tre_parse_int(&r, ctx->re_end);
}
if ((max >= 0 && min > max) || min > RE_DUP_MAX || max > RE_DUP_MAX)
return REG_BADBR;
#ifdef TRE_APPROX
do {
int done;
start = r;
done = 0;
if (!counts_set)
while (r + 1 < ctx->re_end && !done)
{
switch (*r)
{
case CHAR_PLUS:
DPRINT(("tre_parse: ins limit: '%.*" STRF "'\n", REST(r)));
r++;
limit_ins = tre_parse_int(&r, ctx->re_end);
if (limit_ins < 0)
limit_ins = INT_MAX;
counts_set = 1;
break;
case CHAR_MINUS:
DPRINT(("tre_parse: del limit: '%.*" STRF "'\n", REST(r)));
r++;
limit_del = tre_parse_int(&r, ctx->re_end);
if (limit_del < 0)
limit_del = INT_MAX;
counts_set = 1;
break;
case CHAR_HASH:
DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
r++;
limit_subst = tre_parse_int(&r, ctx->re_end);
if (limit_subst < 0)
limit_subst = INT_MAX;
counts_set = 1;
break;
case CHAR_TILDE:
DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
r++;
limit_err = tre_parse_int(&r, ctx->re_end);
if (limit_err < 0)
limit_err = INT_MAX;
approx = 1;
break;
case CHAR_COMMA:
r++;
break;
case L' ':
r++;
break;
case L'}':
done = 1;
break;
default:
done = 1;
break;
}
}
done = 0;
if (!costs_set)
while (r + 1 < ctx->re_end && !done)
{
switch (*r)
{
case CHAR_PLUS:
case L' ':
r++;
break;
case L'<':
DPRINT(("tre_parse: max cost: '%.*" STRF "'\n", REST(r)));
r++;
while (*r == L' ')
r++;
cost_max = tre_parse_int(&r, ctx->re_end);
if (cost_max < 0)
cost_max = INT_MAX;
else
cost_max--;
approx = 1;
break;
case CHAR_COMMA:
r++;
done = 1;
break;
default:
if (*r >= L'0' && *r <= L'9')
{
#ifdef TRE_DEBUG
const tre_char_t *sr = r;
#endif
int cost = tre_parse_int(&r, ctx->re_end);
switch (*r)
{
case L'i':
DPRINT(("tre_parse: ins cost: '%.*" STRF "'\n",
REST(sr)));
r++;
cost_ins = cost;
costs_set = 1;
break;
case L'd':
DPRINT(("tre_parse: del cost: '%.*" STRF "'\n",
REST(sr)));
r++;
cost_del = cost;
costs_set = 1;
break;
case L's':
DPRINT(("tre_parse: subst cost: '%.*" STRF "'\n",
REST(sr)));
r++;
cost_subst = cost;
costs_set = 1;
break;
default:
return REG_BADBR;
}
}
else
{
done = 1;
break;
}
}
}
} while (start != r);
#endif
if (r >= ctx->re_end)
return REG_EBRACE;
if (r == ctx->re)
return REG_BADBR;
if (ctx->cflags & REG_EXTENDED)
{
if (r >= ctx->re_end || *r != CHAR_RBRACE)
return REG_BADBR;
r++;
if (r < ctx->re_end)
{
if (*r == CHAR_QUESTIONMARK)
{
if (ctx->cflags & REG_ENHANCED)
{
minimal = !(ctx->cflags & REG_UNGREEDY);
r++;
}
else return REG_BADRPT;
}
else if (*r == CHAR_STAR || *r == CHAR_PLUS)
{
return REG_BADRPT;
}
}
}
else
{
if (r + 1 >= ctx->re_end
|| *r != CHAR_BACKSLASH
|| *(r + 1) != CHAR_RBRACE)
return REG_BADBR;
r += 2;
if (r < ctx->re_end && *r == CHAR_STAR)
{
return REG_BADRPT;
}
}
if (minimal)
ctx->num_reorder_tags++;
if (!result) goto parse_bound_exit;
#ifdef TRE_APPROX
if (min < 0 && max < 0)
min = max = 1;
#endif
*result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
if (!*result)
return REG_ESPACE;
#ifdef TRE_APPROX
if (approx || costs_set || counts_set)
{
int *params;
tre_iteration_t *iter = (*result)->obj;
if (costs_set || counts_set)
{
if (limit_ins == TRE_PARAM_UNSET)
{
if (cost_ins == TRE_PARAM_UNSET)
limit_ins = 0;
else
limit_ins = INT_MAX;
}
if (limit_del == TRE_PARAM_UNSET)
{
if (cost_del == TRE_PARAM_UNSET)
limit_del = 0;
else
limit_del = INT_MAX;
}
if (limit_subst == TRE_PARAM_UNSET)
{
if (cost_subst == TRE_PARAM_UNSET)
limit_subst = 0;
else
limit_subst = INT_MAX;
}
}
if (cost_max == TRE_PARAM_UNSET)
cost_max = INT_MAX;
if (limit_err == TRE_PARAM_UNSET)
limit_err = INT_MAX;
ctx->have_approx = 1;
params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
if (!params)
return REG_ESPACE;
for (i = 0; i < TRE_PARAM_LAST; i++)
params[i] = TRE_PARAM_UNSET;
params[TRE_PARAM_COST_INS] = cost_ins;
params[TRE_PARAM_COST_DEL] = cost_del;
params[TRE_PARAM_COST_SUBST] = cost_subst;
params[TRE_PARAM_COST_MAX] = cost_max;
params[TRE_PARAM_MAX_INS] = limit_ins;
params[TRE_PARAM_MAX_DEL] = limit_del;
params[TRE_PARAM_MAX_SUBST] = limit_subst;
params[TRE_PARAM_MAX_ERR] = limit_err;
iter->params = params;
}
#endif
parse_bound_exit:
#ifdef TRE_APPROX
DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
"limits [%d,%d,%d, total %d]\n",
min, max, cost_ins, cost_del, cost_subst, cost_max,
limit_ins, limit_del, limit_subst, limit_err));
#else
DPRINT(("tre_parse_bound: min %d, max %d\n", min, max));
#endif
ctx->re = r;
return REG_OK;
}
typedef enum {
PARSE_RE = 0,
PARSE_ATOM,
PARSE_MARK_FOR_SUBMATCH,
PARSE_BRANCH,
PARSE_PIECE,
PARSE_CATENATION,
PARSE_POST_CATENATION,
PARSE_UNION,
PARSE_POST_UNION,
PARSE_POSTFIX,
} tre_parse_re_stack_symbol_t;
reg_errcode_t
tre_parse(tre_parse_ctx_t *ctx)
{
tre_ast_node_t *result = NULL;
tre_parse_re_stack_symbol_t symbol;
reg_errcode_t status = REG_OK;
tre_stack_t *stack = ctx->stack;
int bottom = tre_stack_num_objects(stack);
int depth = 0;
int temporary_cflags = 0;
int bre_branch_begin;
#ifdef TRE_DEBUG
const tre_char_t *tmp_re;
#endif
DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d cflags = 0%o\n",
ctx->len, ctx->re, ctx->len, ctx->cflags));
if (ctx->len <= 0) return REG_EMPTY;
if (!ctx->nofirstsub)
{
STACK_PUSH(stack, int, ctx->cflags);
STACK_PUSH(stack, int, ctx->submatch_id);
STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
ctx->submatch_id++;
}
STACK_PUSH(stack, int, 0); STACK_PUSH(stack, int, PARSE_RE);
ctx->re_start = ctx->re;
ctx->re_end = ctx->re + ctx->len;
while (tre_stack_num_objects(stack) > bottom)
{
symbol = tre_stack_pop_int(stack);
switch (symbol)
{
case PARSE_RE:
bre_branch_begin = tre_stack_pop_int(stack);
if (
#ifdef REG_LITERAL
!(ctx->cflags & REG_LITERAL) &&
#endif
ctx->cflags & (REG_EXTENDED | REG_ENHANCED))
STACK_PUSHX(stack, int, PARSE_UNION);
STACK_PUSHX(stack, int, bre_branch_begin);
STACK_PUSHX(stack, int, PARSE_BRANCH);
break;
case PARSE_BRANCH:
bre_branch_begin = tre_stack_pop_int(stack);
STACK_PUSHX(stack, int, PARSE_CATENATION);
STACK_PUSHX(stack, int, bre_branch_begin);
STACK_PUSHX(stack, int, PARSE_PIECE);
break;
case PARSE_PIECE:
bre_branch_begin = tre_stack_pop_int(stack);
STACK_PUSHX(stack, int, PARSE_POSTFIX);
STACK_PUSHX(stack, int, bre_branch_begin);
STACK_PUSHX(stack, int, PARSE_ATOM);
break;
case PARSE_CATENATION:
{
tre_char_t c;
if (ctx->re >= ctx->re_end)
break;
c = *ctx->re;
#ifdef REG_LITERAL
if (!(ctx->cflags & REG_LITERAL))
{
#endif
if ((ctx->cflags & REG_EXTENDED && c == CHAR_PIPE) ||
((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED
&& ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH &&
*(ctx->re + 1) == CHAR_PIPE))
break;
if ((ctx->cflags & REG_EXTENDED
&& c == CHAR_RPAREN && depth > 0)
|| (!(ctx->cflags & REG_EXTENDED)
&& ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH
&& *(ctx->re + 1) == CHAR_RPAREN))
{
if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
return REG_EPAREN;
DPRINT(("tre_parse: group end: '%.*" STRF "'\n",
REST(ctx->re)));
depth--;
if (!(ctx->cflags & (REG_EXTENDED | REG_ENHANCED)))
ctx->re += 2;
break;
}
#ifdef REG_LITERAL
}
#endif
#ifdef REG_LEFT_ASSOC
if (ctx->cflags & REG_LEFT_ASSOC)
{
STACK_PUSHX(stack, int, PARSE_CATENATION);
STACK_PUSHX(stack, voidptr, result);
STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
STACK_PUSHX(stack, int, 0); STACK_PUSHX(stack, int, PARSE_PIECE);
}
else
#endif
{
STACK_PUSHX(stack, voidptr, result);
STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
STACK_PUSHX(stack, int, PARSE_CATENATION);
STACK_PUSHX(stack, int, 0); STACK_PUSHX(stack, int, PARSE_PIECE);
}
break;
}
case PARSE_POST_CATENATION:
{
tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
tre_ast_node_t *tmp_node;
tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
if (!tmp_node)
return REG_ESPACE;
result = tmp_node;
break;
}
case PARSE_UNION:
if (ctx->re >= ctx->re_end)
break;
#ifdef REG_LITERAL
if (ctx->cflags & REG_LITERAL)
break;
#endif
if (!(ctx->cflags & REG_EXTENDED))
{
if (*ctx->re != CHAR_BACKSLASH || ctx->re + 1 >= ctx->re_end)
break;
ctx->re++;
}
switch (*ctx->re)
{
case CHAR_PIPE:
DPRINT(("tre_parse: union: '%.*" STRF "'\n",
REST(ctx->re)));
STACK_PUSHX(stack, int, PARSE_UNION);
STACK_PUSHX(stack, voidptr, (void *)ctx->re);
STACK_PUSHX(stack, voidptr, result);
STACK_PUSHX(stack, int, PARSE_POST_UNION);
STACK_PUSHX(stack, int, (ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED); STACK_PUSHX(stack, int, PARSE_BRANCH);
ctx->re++;
break;
case CHAR_RPAREN:
ctx->re++;
break;
default:
if (!(ctx->cflags & REG_EXTENDED))
ctx->re--;
break;
}
break;
case PARSE_POST_UNION:
{
tre_ast_node_t *tmp_node;
tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
const tre_char_t *pipechar = tre_stack_pop_voidptr(stack);
if (pipechar == ctx->re - 1)
{
return REG_EMPTY;
}
tmp_node = tre_ast_new_union(ctx->mem, tree, result);
if (!tmp_node)
return REG_ESPACE;
result = tmp_node;
break;
}
case PARSE_POSTFIX:
if (ctx->re >= ctx->re_end)
break;
#ifdef REG_LITERAL
if (ctx->cflags & REG_LITERAL)
break;
#endif
int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
int rep_min = 0;
int rep_max = -1;
#ifdef TRE_DEBUG
int lbrace_off;
#endif
switch (*ctx->re)
{
case CHAR_PLUS:
case CHAR_QUESTIONMARK:
if (!(ctx->cflags & REG_EXTENDED))
break;
case CHAR_STAR:
{
tre_ast_node_t *tmp_node;
#ifdef TRE_DEBUG
const char *tstr = "star";
tmp_re = ctx->re;
#endif
handle_plus_or_question:
if (result->type == LITERAL && result->submatch_id < 0 &&
IS_ASSERTION((tre_literal_t *)result->obj))
{
if (!(ctx->cflags & REG_EXTENDED)) break;
return REG_BADRPT;
}
if (*ctx->re == CHAR_PLUS)
{
rep_min = 1;
#ifdef TRE_DEBUG
tstr = "plus";
#endif
}
if (*ctx->re == CHAR_QUESTIONMARK)
{
rep_max = 1;
#ifdef TRE_DEBUG
tstr = "questionmark";
#endif
}
if (ctx->cflags & REG_EXTENDED)
{
if (ctx->re + 1 < ctx->re_end)
{
if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
{
if (ctx->cflags & REG_ENHANCED)
{
minimal = !(ctx->cflags & REG_UNGREEDY);
ctx->re++;
}
else return REG_BADRPT;
}
else if (*(ctx->re + 1) == CHAR_STAR
|| *(ctx->re + 1) == CHAR_PLUS)
{
return REG_BADRPT;
}
}
}
else
{
if (ctx->re + 1 < ctx->re_end && *(ctx->re + 1) == CHAR_STAR)
{
return REG_BADRPT;
}
if (ctx->re + 2 < ctx->re_end)
{
if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 1) == CHAR_QUESTIONMARK)
{
if (ctx->cflags & REG_ENHANCED)
{
minimal = !(ctx->cflags & REG_UNGREEDY);
ctx->re += 2;
}
}
else if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 2) == CHAR_PLUS)
{
return REG_BADRPT;
}
}
}
if (minimal)
ctx->num_reorder_tags++;
DPRINT(("tre_parse: %s %s: '%.*" STRF "'\n",
minimal ? " minimal" : "greedy", tstr, REST(tmp_re)));
if (result == NULL)
{
if (ctx->cflags & REG_EXTENDED) return REG_BADRPT;
else goto parse_literal;
}
ctx->re++;
tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
minimal);
if (tmp_node == NULL)
return REG_ESPACE;
result = tmp_node;
result->submatch_id = ctx->submatch_id_invisible++;
#if 0
STACK_PUSHX(stack, int, PARSE_POSTFIX);
#endif
}
break;
case CHAR_BACKSLASH:
if (!(ctx->cflags & REG_EXTENDED)
&& ctx->re + 1 < ctx->re_end)
{
switch (*(ctx->re + 1))
{
case CHAR_LBRACE:
ctx->re++;
#ifdef TRE_DEBUG
lbrace_off = 2;
#endif
goto parse_brace;
case CHAR_PLUS:
case CHAR_QUESTIONMARK:
if (ctx->cflags & REG_ENHANCED)
{
#ifdef TRE_DEBUG
tmp_re = ctx->re;
#endif
ctx->re++;
goto handle_plus_or_question;
}
break;
}
break;
}
else
break;
case CHAR_LBRACE:
{
int raw_assertion;
if (!(ctx->cflags & REG_EXTENDED))
break;
#ifdef TRE_DEBUG
lbrace_off = 1;
#endif
parse_brace:
raw_assertion = (result->type == LITERAL
&& result->submatch_id < 0
&& IS_ASSERTION((tre_literal_t *)result->obj));
ctx->re++;
status = tre_parse_bound(ctx, &result);
#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
if (status == REG_NOMATCH)
{
ctx->re--;
break;
}
#endif
DPRINT(("tre_parse: bound: '%.*" STRF "'\n",
REST(ctx->re - lbrace_off)));
if (status != REG_OK)
return status;
if (raw_assertion) return REG_BADRPT;
if (result->type == ITERATION)
result->submatch_id = ctx->submatch_id_invisible++;
#if 0
STACK_PUSHX(stack, int, PARSE_POSTFIX);
#endif
break;
}
}
break;
case PARSE_ATOM:
{
bre_branch_begin = tre_stack_pop_int(stack);
if (ctx->re >= ctx->re_end)
goto parse_literal;
#ifdef REG_LITERAL
if (ctx->cflags & REG_LITERAL)
goto parse_literal;
#endif
switch (*ctx->re)
{
case CHAR_LPAREN:
if ((ctx->cflags & (REG_EXTENDED|REG_ENHANCED)) ==
(REG_EXTENDED|REG_ENHANCED)
&& *(ctx->re + 1) == CHAR_QUESTIONMARK)
{
int new_cflags = ctx->cflags;
int bit = 1;
int invisible_submatch = 0;
DPRINT(("tre_parse: extension: '%.*" STRF "'\n",
REST(ctx->re)));
ctx->re += 2;
while (1)
{
if (*ctx->re == L'i')
{
DPRINT(("tre_parse: icase: '%.*" STRF "'\n",
REST(ctx->re)));
if (bit)
new_cflags |= REG_ICASE;
else
new_cflags &= ~REG_ICASE;
ctx->re++;
}
else if (*ctx->re == L'n')
{
DPRINT(("tre_parse: newline: '%.*" STRF "'\n",
REST(ctx->re)));
if (bit)
new_cflags |= REG_NEWLINE;
else
new_cflags &= ~REG_NEWLINE;
ctx->re++;
}
#ifdef REG_LEFT_ASSOC
else if (*ctx->re == L'l')
{
DPRINT(("tre_parse: left assoc: '%.*" STRF "'\n",
REST(ctx->re)));
if (bit)
new_cflags |= REG_LEFT_ASSOC;
else
new_cflags &= ~REG_LEFT_ASSOC;
ctx->re++;
}
#endif
#ifdef REG_UNGREEDY
else if (*ctx->re == L'U')
{
DPRINT(("tre_parse: ungreedy: '%.*" STRF "'\n",
REST(ctx->re)));
if (bit)
new_cflags |= REG_UNGREEDY;
else
new_cflags &= ~REG_UNGREEDY;
ctx->re++;
}
#endif
else if (*ctx->re == CHAR_MINUS)
{
DPRINT(("tre_parse: turn off: '%.*" STRF "'\n",
REST(ctx->re)));
ctx->re++;
bit = 0;
}
else if (*ctx->re == CHAR_COLON)
{
DPRINT(("tre_parse: no group: '%.*" STRF
"', (invisible submatch %d)\n",
REST(ctx->re), ctx->submatch_id_invisible));
ctx->re++;
depth++;
invisible_submatch = 1;
break;
}
else if (*ctx->re == CHAR_HASH)
{
DPRINT(("tre_parse: comment: '%.*" STRF "'\n",
REST(ctx->re)));
while (*ctx->re != CHAR_RPAREN
&& ctx->re < ctx->re_end)
ctx->re++;
if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
{
ctx->re++;
break;
}
else
return REG_BADPAT;
}
else if (*ctx->re == CHAR_RPAREN)
{
ctx->re++;
break;
}
else
return REG_BADRPT;
}
if (invisible_submatch)
{
STACK_PUSHX(stack, int, ctx->cflags);
STACK_PUSHX(stack, int, ctx->submatch_id_invisible);
STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
ctx->submatch_id_invisible++;
STACK_PUSHX(stack, int, 0); STACK_PUSHX(stack, int, PARSE_RE);
}
else {
STACK_PUSHX(stack, int, 0); STACK_PUSHX(stack, int, PARSE_ATOM);
}
ctx->cflags = new_cflags;
break;
}
if (ctx->cflags & REG_EXTENDED)
{
parse_bre_lparen:
DPRINT(("tre_parse: group begin: '%.*" STRF
"', submatch %d\n", REST(ctx->re),
ctx->submatch_id));
ctx->re++;
STACK_PUSHX(stack, int, ctx->cflags);
STACK_PUSHX(stack, int, ctx->submatch_id);
STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
STACK_PUSHX(stack, int, !(ctx->cflags & REG_EXTENDED));
STACK_PUSHX(stack, int, PARSE_RE);
ctx->submatch_id++;
depth++;
}
else
goto parse_literal;
break;
case CHAR_RPAREN:
if (ctx->cflags & REG_EXTENDED && depth > 0)
{
parse_bre_rparen_empty:
if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
return REG_EPAREN;
DPRINT(("tre_parse: empty: '%.*" STRF "'\n",
REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (result == NULL)
return REG_ESPACE;
if (!(ctx->cflags & REG_EXTENDED))
ctx->re--;
}
else
goto parse_literal;
break;
case CHAR_LBRACKET:
DPRINT(("tre_parse: bracket: '%.*" STRF "'\n",
REST(ctx->re)));
ctx->re++;
status = tre_parse_bracket(ctx, &result);
if (status != REG_OK)
return status;
break;
case CHAR_BACKSLASH:
if (!(ctx->cflags & REG_EXTENDED)
&& ctx->re + 1 < ctx->re_end)
{
if (*(ctx->re + 1) == CHAR_LPAREN)
{
ctx->re++;
goto parse_bre_lparen;
}
else if (*(ctx->re + 1) == CHAR_RPAREN)
{
ctx->re++;
goto parse_bre_rparen_empty;
}
if (*(ctx->re + 1) == CHAR_LBRACE) goto parse_literal;
}
if (ctx->re + 1 >= ctx->re_end)
return REG_EESCAPE;
if (!(ctx->cflags & REG_ENHANCED))
{
DPRINT(("tre_parse: unenhanced bleep: '%.*" STRF "'\n", REST(ctx->re)));
ctx->re++;
goto unenhanced_backslash;
}
{
tre_char_t buf[64];
tre_expand_macro(ctx->re + 1, ctx->re_end,
buf, elementsof(buf));
if (buf[0] != 0)
{
tre_parse_ctx_t subctx;
memcpy(&subctx, ctx, sizeof(subctx));
subctx.re = buf;
subctx.len = tre_strlen(buf);
subctx.nofirstsub = 1;
status = tre_parse(&subctx);
if (status != REG_OK)
return status;
ctx->re += 2;
ctx->position = subctx.position;
result = subctx.result;
break;
}
}
#ifdef REG_LITERAL
if (*(ctx->re + 1) == L'Q')
{
DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
REST(ctx->re)));
ctx->cflags |= REG_LITERAL;
temporary_cflags |= REG_LITERAL;
ctx->re += 2;
STACK_PUSHX(stack, int, 0);
STACK_PUSHX(stack, int, PARSE_ATOM);
break;
}
#endif
DPRINT(("tre_parse: bleep: '%.*" STRF "'\n", REST(ctx->re)));
ctx->re++;
switch (*ctx->re)
{
case L'b':
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_WB, -1);
ctx->re++;
break;
case L'B':
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_WB_NEG, -1);
ctx->re++;
break;
case L'<':
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_BOW, -1);
ctx->re++;
break;
case L'>':
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_EOW, -1);
ctx->re++;
break;
case L'x':
ctx->re++;
if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
{
char tmp[3] = {0, 0, 0};
long val;
DPRINT(("tre_parse: 8 bit hex: '%.*" STRF "'\n",
REST(ctx->re - 2)));
if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
ctx->re < ctx->re_end)
{
tmp[0] = (char)ctx->re[0];
ctx->re++;
}
if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
ctx->re < ctx->re_end)
{
tmp[1] = (char)ctx->re[0];
ctx->re++;
}
val = strtol(tmp, NULL, 16);
result = tre_ast_new_literal(ctx->mem, (int)val,
(int)val, ctx->position);
ctx->position++;
break;
}
else if (ctx->re < ctx->re_end)
{
char tmp[32];
long val;
int i = 0;
ctx->re++;
while (ctx->re_end - ctx->re >= 0)
{
if (i == sizeof(tmp))
return REG_EBRACE;
if (ctx->re[0] == CHAR_RBRACE)
break;
if (tre_isxdigit_l(ctx->re[0], ctx->loc))
{
tmp[i] = (char)ctx->re[0];
i++;
ctx->re++;
continue;
}
return REG_EBRACE;
}
ctx->re++;
tmp[i] = 0;
val = strtol(tmp, NULL, 16);
result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
ctx->position);
ctx->position++;
break;
}
default:
unenhanced_backslash:
if ((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) !=
REG_EXTENDED &&
tre_isdigit_l(*ctx->re, ctx->loc) && *ctx->re != L'0')
{
int val = *ctx->re - L'0';
DPRINT(("tre_parse: backref: '%.*" STRF "'\n",
REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, BACKREF, val,
ctx->position);
if (result == NULL)
return REG_ESPACE;
result->submatch_id = ctx->submatch_id_invisible++;
ctx->position++;
ctx->num_reorder_tags++;
ctx->max_backref = MAX(val, ctx->max_backref);
ctx->re++;
}
else
{
DPRINT(("tre_parse: escaped: '%.*" STRF "'\n",
REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
ctx->position);
ctx->position++;
ctx->re++;
}
break;
}
if (result == NULL)
return REG_ESPACE;
break;
case CHAR_PERIOD:
DPRINT(("tre_parse: any: '%.*" STRF "'\n",
REST(ctx->re)));
if (ctx->cflags & REG_NEWLINE)
{
tre_ast_node_t *tmp1;
tre_ast_node_t *tmp2;
tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
ctx->position);
if (!tmp1)
return REG_ESPACE;
tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
ctx->position + 1);
if (!tmp2)
return REG_ESPACE;
result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
if (!result)
return REG_ESPACE;
ctx->position += 2;
}
else
{
result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
ctx->position);
if (!result)
return REG_ESPACE;
ctx->position++;
}
ctx->re++;
break;
case CHAR_CARET:
if (ctx->cflags & REG_EXTENDED
|| bre_branch_begin
|| ctx->re == ctx->re_start)
{
DPRINT(("tre_parse: BOL: '%.*" STRF "'\n",
REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_BOL, -1);
if (result == NULL)
return REG_ESPACE;
ctx->re++;
}
else
goto parse_literal;
break;
case CHAR_DOLLAR:
if (ctx->cflags & REG_EXTENDED
|| (ctx->re + 2 < ctx->re_end
&& *(ctx->re + 1) == CHAR_BACKSLASH
&& *(ctx->re + 2) == CHAR_RPAREN)
|| ctx->re + 1 == ctx->re_end)
{
DPRINT(("tre_parse: EOL: '%.*" STRF "'\n",
REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_EOL, -1);
if (result == NULL)
return REG_ESPACE;
ctx->re++;
}
else
goto parse_literal;
break;
default:
parse_literal:
if (temporary_cflags && ctx->re + 1 < ctx->re_end
&& *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
{
DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n",
REST(ctx->re)));
ctx->cflags &= ~temporary_cflags;
temporary_cflags = 0;
ctx->re += 2;
if (ctx->re < ctx->re_end)
{
STACK_PUSHX(stack, int, 0);
STACK_PUSHX(stack, int, PARSE_ATOM);
}
else
{
result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (!result) return REG_ESPACE;
}
break;
}
#ifdef REG_LITERAL
if (!(ctx->cflags & REG_LITERAL))
{
#endif
if (ctx->re >= ctx->re_end) return depth > 0 ? REG_EPAREN
: REG_EMPTY;
if (ctx->cflags & REG_EXTENDED)
{
if (ctx->re < ctx->re_end)
{
if (*ctx->re == CHAR_PIPE) return REG_EMPTY;
if (*ctx->re == CHAR_LBRACE)
{
ctx->re++;
empty_parse_bound:
status = tre_parse_bound(ctx, NULL);
#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
if (status == REG_NOMATCH)
{
ctx->re--;
}
else
{
#endif
if (status != REG_OK)
return status;
return REG_BADRPT;
#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
}
#endif
}
#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
else
#endif
if (*ctx->re == CHAR_STAR
|| *ctx->re == CHAR_PLUS
|| *ctx->re == CHAR_QUESTIONMARK)
{
return REG_BADRPT;
}
}
}
else if (ctx->re + 1 < ctx->re_end
&& *ctx->re == CHAR_BACKSLASH
&& *(ctx->re + 1) == CHAR_LBRACE)
{
ctx->re += 2;
goto empty_parse_bound;
}
#ifdef REG_LITERAL
}
#endif
DPRINT(("tre_parse: literal: '%.*" STRF "'\n",
REST(ctx->re)));
if (ctx->cflags & REG_ICASE
&& (tre_isupper_l(*ctx->re, ctx->loc) ||
tre_islower_l(*ctx->re, ctx->loc)))
{
tre_ast_node_t *tmp1;
tre_ast_node_t *tmp2;
tmp1 = tre_ast_new_literal(ctx->mem,
tre_toupper_l(*ctx->re, ctx->loc),
tre_toupper_l(*ctx->re, ctx->loc),
ctx->position);
if (!tmp1)
return REG_ESPACE;
tmp2 = tre_ast_new_literal(ctx->mem,
tre_tolower_l(*ctx->re, ctx->loc),
tre_tolower_l(*ctx->re, ctx->loc),
ctx->position);
if (!tmp2)
return REG_ESPACE;
result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
if (!result)
return REG_ESPACE;
}
else
{
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
ctx->position);
if (!result)
return REG_ESPACE;
}
ctx->position++;
ctx->re++;
break;
}
break;
}
case PARSE_MARK_FOR_SUBMATCH:
{
int submatch_id = tre_stack_pop_int(stack);
ctx->cflags = tre_stack_pop_int(stack);
if (result->submatch_id >= 0 &&
result->submatch_id < SUBMATCH_ID_INVISIBLE_START)
{
tre_ast_node_t *n, *tmp_node;
if (submatch_id >= SUBMATCH_ID_INVISIBLE_START)
break;
n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (n == NULL)
return REG_ESPACE;
tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
if (tmp_node == NULL)
return REG_ESPACE;
tmp_node->num_submatches = result->num_submatches;
result = tmp_node;
}
result->submatch_id = submatch_id;
if (submatch_id < SUBMATCH_ID_INVISIBLE_START)
result->num_submatches++;
break;
}
default:
assert(0);
break;
}
}
if (depth > 0)
return REG_EPAREN;
ctx->result = result;
return REG_OK;
}