#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <limits.h>
#include "tre-internal.h"
#include "tre-mem.h"
#include "tre-stack.h"
#include "tre-ast.h"
#include "tre-parse.h"
#include "tre-compile.h"
#include "tre.h"
#include "tre-last-matched.h"
#include "xmalloc.h"
#undef bit_ffs
#define bit_ffs(name, nbits, value) { \
register bitstr_t *_name = name; \
register int _byte, _nbits = nbits; \
register int _stopbyte = _bit_byte(_nbits), _value = -1; \
for (_byte = 0; _byte <= _stopbyte; ++_byte) \
if (_name[_byte]) { \
_value = _byte << 3; \
for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
++_value, _stopbyte >>= 1); \
break; \
} \
*(value) = _value; \
}
#ifdef TRE_DEBUG
static const char *tag_dir_str[] = {
"minimize",
"maximize",
"left-maximize"
};
static const char _indent[] = " ";
static void
print_indent(int indent)
{
while (indent-- > 0)
DPRINT((_indent));
}
static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
int num_tags);
static void
print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
int num_tags)
{
tre_last_matched_pre_t *u = branch->last_matched;
int n_last_matched = 0;
while (u)
{
n_last_matched++;
u = u->next;
}
print_indent(indent);
DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
print_indent(indent);
DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
n_last_matched));
if (branch->n_last_matched != n_last_matched)
DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
if (branch->cmp_tag > 0)
{
int i;
const char *sep = " tags=";
print_indent(indent);
DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
for (i = 0; i < num_tags; i++)
if (bit_test(branch->tags, i))
{
DPRINT(("%s%d", sep, i));
sep = ",";
}
DPRINT(("\n"));
}
u = branch->last_matched;
indent++;
while (u)
{
print_last_matched_pre(u, indent, num_tags);
u = u->next;
}
}
static void
print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
{
tre_last_matched_branch_pre_t *b = lm->branches;
int n_branches = 0;
while (b)
{
n_branches++;
b = b->next;
}
print_indent(indent);
DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
print_indent(indent);
DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
lm->n_branches, n_branches));
if (lm->n_branches != n_branches)
DPRINT(("*** mismatch between n and branches ***\n"));
b = lm->branches;
indent++;
while (b)
{
print_last_match_branch_pre(b, indent, num_tags);
b = b->next;
}
}
static void print_last_matched(tre_last_matched_t *lm, int indent);
static void
print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
{
tre_last_matched_t *u;
int i;
print_indent(indent);
DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
if (branch->cmp_tag > 0)
{
print_indent(indent);
DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
if (branch->n_tags > 0)
{
const char *sep = " tags=";
for (i = 0; i < branch->n_tags; i++)
{
DPRINT(("%s%d", sep, branch->tags[i]));
sep = ",";
}
}
DPRINT(("\n"));
}
u = branch->last_matched;
indent++;
for (i = branch->n_last_matched; i > 0; i--, u++)
print_last_matched(u, indent);
}
static void
print_last_matched(tre_last_matched_t *lm, int indent)
{
int i;
tre_last_matched_branch_t *b;
print_indent(indent);
DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
lm->start_tag));
b = lm->branches;
indent++;
for (i = lm->n_branches; i > 0; i--, b++)
print_last_match_branch(b, indent);
}
#endif
static reg_errcode_t
tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
int tag_id, int num_tags)
{
tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
if (db)
{
if (sb)
{
bitstr_t *l = db->tags;
bitstr_t *r = sb->tags;
int i = bitstr_size(num_tags);
while(i-- > 0)
*l++ |= *r++;
db->n_tags += sb->n_tags;
db->tot_tags += sb->tot_tags;
if (db->last_matched)
{
if (sb->last_matched)
{
tre_last_matched_pre_t *u = db->last_matched;
while(u->next)
u = u->next;
u->next = sb->last_matched;
db->n_last_matched += sb->n_last_matched;
db->tot_branches += sb->tot_branches;
db->tot_last_matched += sb->tot_last_matched;
}
}
else if (sb->last_matched)
{
db->last_matched = sb->last_matched;
db->n_last_matched = sb->n_last_matched;
db->tot_branches = sb->tot_branches;
db->tot_last_matched = sb->tot_last_matched;
}
}
}
else
db = sb;
if (tag_id != 0)
{
if (!db)
{
db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
+ bitstr_size(num_tags));
if (db == NULL)
return REG_ESPACE;
db->tot_branches = 1;
}
if (tag_id > 0)
{
bit_set(db->tags, tag_id);
db->n_tags++;
db->tot_tags++;
}
}
dst->last_matched_branch = db;
return REG_OK;
}
static reg_errcode_t
tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
{
tre_catenation_t *c;
DPRINT(("add_tag_left: tag %d\n", tag_id));
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
return REG_ESPACE;
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
if (c->left == NULL)
return REG_ESPACE;
c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
if (c->right == NULL)
return REG_ESPACE;
c->right->obj = node->obj;
c->right->type = node->type;
c->right->last_matched_branch = node->last_matched_branch;
c->right->nullable = -1;
c->right->submatch_id = -1;
node->obj = c;
node->type = CATENATION;
node->original = c->right;
return REG_OK;
}
static reg_errcode_t
tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
{
tre_catenation_t *c;
DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
return REG_ESPACE;
c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
if (c->right == NULL)
return REG_ESPACE;
c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
if (c->left == NULL)
return REG_ESPACE;
c->left->obj = node->obj;
c->left->type = node->type;
c->left->last_matched_branch = node->last_matched_branch;
c->left->nullable = -1;
c->left->submatch_id = -1;
node->obj = c;
node->type = CATENATION;
node->original = c->left;
return REG_OK;
}
typedef enum {
ADDTAGS_RECURSE,
ADDTAGS_RECURSE_NOT_TOP_UNION,
ADDTAGS_AFTER_ITERATION,
ADDTAGS_AFTER_UNION_LEFT,
ADDTAGS_AFTER_UNION_RIGHT,
ADDTAGS_AFTER_CAT_LEFT,
ADDTAGS_AFTER_CAT_RIGHT,
ADDTAGS_SET_SUBMATCH_END,
ADDTAGS_UNION_RECURSE,
ADDTAGS_UNION_RIGHT_RECURSE,
ADDTAGS_AFTER_UNION_TOP,
} tre_addtags_symbol_t;
enum {
COPY_LAST_MATCHED_BRANCH,
COPY_LAST_MATCHED_BRANCH_NEXT,
COPY_LAST_MATCHED,
COPY_LAST_MATCHED_NEXT,
};
#define REGSET_UNSET ((unsigned)-1)
static void
tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
{
int i;
for (i = 0; regset[i] != REGSET_UNSET; i++)
{
int id = regset[i] / 2;
int start = !(regset[i] % 2);
if (id >= SUBMATCH_ID_INVISIBLE_START)
continue;
DPRINT((" Using tag %d for %s offset of "
"submatch %d\n", tag,
start ? "start" : "end", id));
if (start)
tnfa->submatch_data[id].so_tag = tag;
else
tnfa->submatch_data[id].eo_tag = tag;
}
regset[0] = -1;
}
#define REGSET_HAS_STARTS 0x1
#define REGSET_HAS_ENDS 0x2
static reg_errcode_t
tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
tre_tnfa_t *tnfa)
{
reg_errcode_t status = REG_OK;
tre_addtags_symbol_t symbol;
tre_ast_node_t *node = tree;
int bottom = tre_stack_num_objects(stack);
int first_pass = (mem == NULL || tnfa == NULL);
unsigned *regset, *orig_regset;
int regset_contains = 0;
int num_tags = 0;
int num_minimals = 0;
int tag = 0;
int next_tag = 1;
int minimal_tag = -1;
int *reorder_tags = NULL;
int *rtp;
int *to_reorder;
int id;
tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
if (!first_pass)
{
DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
tnfa->end_tag = 0;
tnfa->minimal_tags[0] = -1;
}
regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
+ tnfa->num_submatches_invisible + 1) * 2));
if (regset == NULL)
{
status = REG_ESPACE;
goto error_regset;
}
regset[0] = REGSET_UNSET;
orig_regset = regset;
if (!first_pass)
{
rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
((2 * tnfa->num_reorder_tags + 1) +
tnfa->num_tags));
if (reorder_tags == NULL)
{
status = REG_ESPACE;
goto error_reorder_tags;
}
to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
}
STACK_PUSH(stack, voidptr, node);
STACK_PUSH(stack, int, ADDTAGS_RECURSE);
while (tre_stack_num_objects(stack) > bottom)
{
if (status != REG_OK)
break;
symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
switch (symbol)
{
int top_union;
case ADDTAGS_SET_SUBMATCH_END:
{
int i;
id = tre_stack_pop_int(stack);
node = tre_stack_pop_voidptr(stack);
for (i = 0; regset[i] != REGSET_UNSET; i++);
regset[i] = id * 2 + 1;
regset[i + 1] = -1;
regset_contains |= REGSET_HAS_ENDS;
if (minimal_tag >= 0)
{
if (first_pass)
{
node->num_tags++;
DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
node->num_tags));
}
else
{
int i;
status = tre_merge_branches(mem, node, NULL, tag,
tnfa->num_tags);
if (status != REG_OK)
break;
status = tre_add_tag_right(mem, node, tag);
if (status != REG_OK)
break;
tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
DPRINT(("Setting t%d direction to %s\n", tag,
tag_dir_str[tnfa->tag_directions[tag]]));
DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
tnfa->minimal_tags[i] = tag;
tnfa->minimal_tags[i + 1] = minimal_tag;
tnfa->minimal_tags[i + 2] = -1;
DPRINT((" Minimal end: t%d reordered to "
"after t%d\n", tag, minimal_tag));
*rtp++ = tag;
*rtp++ = minimal_tag;
num_minimals++;
tre_purge_regset(regset, tnfa, tag);
}
minimal_tag = -1;
DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
regset[0] = REGSET_UNSET;
regset_contains = 0;
tag = next_tag;
num_tags++;
next_tag++;
}
break;
}
case ADDTAGS_RECURSE_NOT_TOP_UNION:
top_union = 0;
goto do_addtags_recurse;
case ADDTAGS_RECURSE:
top_union = 1;
do_addtags_recurse:
node = tre_stack_pop_voidptr(stack);
id = node->submatch_id;
if (id >= 0)
{
int i;
for (i = 0; regset[i] != REGSET_UNSET; i++);
regset[i] = id * 2;
regset[i + 1] = -1;
regset_contains |= REGSET_HAS_STARTS;
STACK_PUSH(stack, voidptr, node);
STACK_PUSHX(stack, int, id);
STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
}
switch (node->type)
{
case LITERAL:
{
tre_literal_t *lit = node->obj;
if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
{
DPRINT(("Literal %d-%d\n",
(int)lit->code_min, (int)lit->code_max));
if (regset_contains)
{
if (first_pass)
{
DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
node->num_tags = 1;
}
else
{
status = tre_merge_branches(mem, node, NULL, tag,
tnfa->num_tags);
if (status != REG_OK)
break;
status = tre_add_tag_left(mem, node, tag);
if (status != REG_OK)
break;
if (regset_contains == REGSET_HAS_STARTS)
tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
else
tnfa->tag_directions[tag] = direction;
DPRINT(("Setting t%d direction to %s\n", tag,
tag_dir_str[tnfa->tag_directions[tag]]));
tre_purge_regset(regset, tnfa, tag);
if (IS_BACKREF(lit))
{
int b = lit->code_max;
int t = tnfa->submatch_data[b].so_tag;
if (tnfa->submatch_data[b].eo_tag < 0)
{
status = REG_ESUBREG;
break;
}
if (t < tag)
{
DPRINT((" Backref %d start: "
"t%d reordered to before t%d\n",
b, tag, t));
if(t > 0)
t--;
*rtp++ = tag;
*rtp++ = t;
}
#if TRE_DEBUG
else
DPRINT((" Backref %d start: "
"(t%d already before t%d)\n",
b, tag, t));
#endif
}
}
DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
tag));
regset[0] = REGSET_UNSET;
regset_contains = 0;
tag = next_tag;
num_tags++;
next_tag++;
}
}
else
{
assert(!IS_TAG(lit));
}
break;
}
case CATENATION:
{
tre_catenation_t *cat = node->obj;
tre_ast_node_t *left = cat->left;
tre_ast_node_t *right = cat->right;
int reserved_tag = -1;
DPRINT(("Catenation, next_tag = %d\n", next_tag));
STACK_PUSHX(stack, voidptr, node);
STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
STACK_PUSHX(stack, voidptr, right);
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
STACK_PUSHX(stack, int, next_tag + left->num_tags);
DPRINT((" Pushing %d for after left\n",
next_tag + left->num_tags));
if (left->num_tags > 0 && right->num_tags > 0)
{
DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
"Reserving next_tag %d to right child\n",
next_tag));
reserved_tag = next_tag;
next_tag++;
}
STACK_PUSHX(stack, int, reserved_tag);
STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
STACK_PUSHX(stack, voidptr, left);
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
}
break;
case ITERATION:
{
tre_iteration_t *iter = node->obj;
DPRINT(("Iteration\n"));
if (first_pass)
STACK_PUSHX(stack, int, regset_contains != 0);
STACK_PUSHX(stack, int, tag);
STACK_PUSHX(stack, voidptr, node);
STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
STACK_PUSHX(stack, voidptr, iter->arg);
STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
if (regset_contains)
{
if (!first_pass)
{
status = tre_merge_branches(mem, node, NULL, tag,
tnfa->num_tags);
if (status != REG_OK)
break;
status = tre_add_tag_left(mem, node, tag);
if (status != REG_OK)
break;
if (regset_contains == REGSET_HAS_STARTS && tag != 0)
tnfa->tag_directions[tag] = iter->minimal ?
TRE_TAG_MINIMIZE :
TRE_TAG_LEFT_MAXIMIZE;
else
tnfa->tag_directions[tag] = direction;
DPRINT(("Setting t%d direction to %s\n", tag,
tag_dir_str[tnfa->tag_directions[tag]]));
tre_purge_regset(regset, tnfa, tag);
}
DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
tag));
regset[0] = REGSET_UNSET;
regset_contains = 0;
tag = next_tag;
num_tags++;
next_tag++;
}
direction = TRE_TAG_LEFT_MAXIMIZE;
DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
}
break;
case UNION:
{
tre_union_t *uni;
tre_ast_node_t *left;
tre_ast_node_t *right;
int front_tag = -1;
DPRINT(("Union\n"));
if (regset_contains)
{
DPRINT((" UNION num_tags++ tag=%d\n", tag));
front_tag = tag;
tag = next_tag;
num_tags++;
next_tag++;
}
if (top_union &&
(node->num_submatches -
(node->submatch_id >= 0 &&
node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
{
tre_ast_node_t *n;
int last = tre_stack_num_objects(stack);
STACK_PUSH(stack, voidptr, node);
STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
while (tre_stack_num_objects(stack) > last)
{
symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
switch (symbol)
{
case ADDTAGS_UNION_RECURSE:
n = tre_stack_pop_voidptr(stack);
uni = n->obj;
left = uni->left;
n->make_branches = 1;
STACK_PUSH(stack, voidptr, n);
STACK_PUSH(stack, int,
ADDTAGS_UNION_RIGHT_RECURSE);
if (left->type == UNION)
{
STACK_PUSH(stack, voidptr, left);
STACK_PUSH(stack, int,
ADDTAGS_UNION_RECURSE);
}
else
{
DPRINT((" ADDTAGS_UNION_RECURSE "
"num_tags++ tag=%d\n", tag));
uni->left_tag = tag;
tag = next_tag;
num_tags++;
next_tag++;
}
break;
case ADDTAGS_UNION_RIGHT_RECURSE:
n = tre_stack_pop_voidptr(stack);
uni = n->obj;
right = uni->right;
if (right->type == UNION)
{
STACK_PUSH(stack, voidptr, right);
STACK_PUSH(stack, int,
ADDTAGS_UNION_RECURSE);
}
else
{
DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
"num_tags++ tag=%d\n", tag));
uni->right_tag = tag;
tag = next_tag;
num_tags++;
next_tag++;
}
break;
default:
assert(0);
break;
}
}
if (!first_pass)
{
STACK_PUSHX(stack, int, front_tag);
STACK_PUSHX(stack, voidptr, node);
STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
}
}
uni = node->obj;
left = uni->left;
right = uni->right;
STACK_PUSHX(stack, voidptr, regset);
STACK_PUSHX(stack, int, regset_contains != 0);
STACK_PUSHX(stack, voidptr, node);
STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
STACK_PUSHX(stack, voidptr, right);
STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
STACK_PUSHX(stack, voidptr, left);
STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
if (regset_contains)
{
if (!first_pass)
{
status = tre_merge_branches(mem, node, NULL, front_tag,
tnfa->num_tags);
if (status != REG_OK)
break;
status = tre_add_tag_left(mem, node, front_tag);
if (status != REG_OK)
break;
if (regset_contains == REGSET_HAS_STARTS)
tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
else
tnfa->tag_directions[front_tag] = direction;
DPRINT(("Setting t%d direction to %s\n", front_tag,
tag_dir_str[tnfa->tag_directions[front_tag]]));
tre_purge_regset(regset, tnfa, front_tag);
}
regset[0] = REGSET_UNSET;
regset_contains = 0;
}
break;
}
}
break;
case ADDTAGS_AFTER_ITERATION:
{
tre_iteration_t *iter;
tre_ast_node_t *orig;
int enter_tag;
node = tre_stack_pop_voidptr(stack);
orig = node->original ? node->original : node;
iter = (tre_iteration_t *)orig->obj;
enter_tag = tre_stack_pop_int(stack);
if (iter->minimal)
minimal_tag = enter_tag;
DPRINT(("After iteration\n"));
if (first_pass)
{
node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
node->num_tags));
}
else
{
tre_last_matched_branch_pre_t *b =
iter->arg->last_matched_branch;
if (b && b->n_tags > 0)
{
tre_last_matched_pre_t *u;
bit_ffs(b->tags, num_tags, &b->cmp_tag);
DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
"cmp_tag = %d\n", b->n_tags, b->cmp_tag));
u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
sizeof(tre_last_matched_branch_pre_t)
+ bitstr_size(tnfa->num_tags));
if (!u)
{
status = REG_ESPACE;
break;
}
u->branches = b;
u->n_branches = 1;
u->start_tag = b->cmp_tag;
u->tot_branches = b->tot_branches;
u->tot_last_matched = 1 + b->tot_last_matched;
u->tot_tags = b->tot_tags;
b = (tre_last_matched_branch_pre_t *)(u + 1);
b->last_matched = u;
b->n_last_matched = 1;
b->tot_branches = 1 + u->tot_branches;
b->tot_last_matched = u->tot_last_matched;
b->tot_tags = u->tot_tags;
iter->arg->last_matched_branch = b;
}
status = tre_merge_branches(mem, node, iter->arg, 0,
tnfa->num_tags);
if (status != REG_OK)
break;
if (iter->minimal)
{
if (iter->min == 0 && iter->max <= 1)
{
tre_ast_node_t *u, *e;
e = tre_ast_new_literal(mem, EMPTY, -1, -1);
if (e == NULL)
{
status = REG_ESPACE;
break;
}
u = tre_ast_new_union(mem, e, iter->arg);
if (u == NULL)
{
status = REG_ESPACE;
break;
}
iter->arg = u;
}
direction = TRE_TAG_MINIMIZE;
}
else
direction = TRE_TAG_MAXIMIZE;
DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
}
break;
}
case ADDTAGS_AFTER_CAT_LEFT:
{
int new_tag = tre_stack_pop_int(stack);
next_tag = tre_stack_pop_int(stack);
DPRINT(("After cat left, tag = %d, next_tag = %d\n",
tag, next_tag));
if (new_tag >= 0)
{
DPRINT((" Setting tag to %d\n", new_tag));
tag = new_tag;
}
break;
}
case ADDTAGS_AFTER_CAT_RIGHT:
{
tre_catenation_t *cat;
DPRINT(("After cat right\n"));
node = tre_stack_pop_voidptr(stack);
cat = node->obj;
if (first_pass)
{
node->num_tags = cat->left->num_tags + cat->right->num_tags;
DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
node->num_tags));
}
else
{
status = tre_merge_branches(mem, cat->left, cat->right, 0,
tnfa->num_tags);
if (status != REG_OK)
break;
status = tre_merge_branches(mem, node, cat->left, 0,
tnfa->num_tags);
}
break;
}
case ADDTAGS_AFTER_UNION_LEFT:
DPRINT(("After union left\n"));
while (*regset != REGSET_UNSET)
regset++;
regset_contains = 0;
break;
case ADDTAGS_AFTER_UNION_RIGHT:
{
int added_tags;
tre_ast_node_t *orig;
tre_union_t *uni;
DPRINT(("After union right\n"));
node = tre_stack_pop_voidptr(stack);
orig = node->original ? node->original : node;
uni = (tre_union_t *)orig->obj;
added_tags = tre_stack_pop_int(stack);
if (first_pass)
{
node->num_tags = uni->left->num_tags + uni->right->num_tags
+ added_tags;
if (uni->left_tag > 0)
node->num_tags++;
if (uni->right_tag > 0)
node->num_tags++;
DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
node->num_tags));
}
regset = tre_stack_pop_voidptr(stack);
if (!first_pass && node->make_branches)
{
tre_last_matched_branch_pre_t *lb =
uni->left->last_matched_branch;
tre_last_matched_branch_pre_t *rb =
uni->right->last_matched_branch;
tre_last_matched_pre_t *lu =
uni->left->last_matched_in_progress;
tre_last_matched_pre_t *ru =
uni->right->last_matched_in_progress;
tre_last_matched_pre_t *u;
if (uni->left_tag > 0)
{
DPRINT(("Setting t%d direction to maximize\n",
uni->left_tag));
status = tre_add_tag_right(mem, uni->left, uni->left_tag);
if (status != REG_OK)
break;
tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
if (!lb)
{
status = tre_merge_branches(mem, uni->left, NULL, -1,
tnfa->num_tags);
if (status != REG_OK)
break;
lb = uni->left->last_matched_branch;
}
lb->cmp_tag = uni->left_tag;
}
if (uni->right_tag > 0)
{
DPRINT(("Setting t%d direction to maximize\n",
uni->right_tag));
status = tre_add_tag_right(mem, uni->right, uni->right_tag);
if (status != REG_OK)
break;
tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
if (!rb)
{
status = tre_merge_branches(mem, uni->right, NULL, -1,
tnfa->num_tags);
if (status != REG_OK)
break;
rb = uni->right->last_matched_branch;
}
rb->cmp_tag = uni->right_tag;
}
if (lu == NULL)
{
if (ru == NULL)
{
u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
if (!u)
{
status = REG_ESPACE;
break;
}
u->tot_last_matched = 1;
if (lb)
{
u->branches = lb;
u->n_branches = 1;
u->tot_branches += lb->tot_branches;
u->tot_last_matched += lb->tot_last_matched;
u->tot_tags += lb->tot_tags;
if (rb)
{
lb->next = rb;
u->n_branches++;
u->tot_branches += rb->tot_branches;
u->tot_last_matched += rb->tot_last_matched;
u->tot_tags += rb->tot_tags;
}
}
else if (rb)
{
u->branches = rb;
u->n_branches = 1;
u->tot_branches += rb->tot_branches;
u->tot_last_matched += rb->tot_last_matched;
u->tot_tags += rb->tot_tags;
}
}
else
{
u = ru;
if (lb)
{
lb->next = u->branches;
u->branches = lb;
u->n_branches++;
u->tot_branches += lb->tot_branches;
u->tot_last_matched += lb->tot_last_matched;
u->tot_tags += lb->tot_tags;
}
}
}
else if (ru == NULL)
{
u = lu;
if (rb)
{
rb->next = u->branches;
u->branches = rb;
u->n_branches++;
u->tot_branches += rb->tot_branches;
u->tot_last_matched += rb->tot_last_matched;
u->tot_tags += rb->tot_tags;
}
}
else
{
if (lu->branches)
{
if (ru->branches)
{
tre_last_matched_branch_pre_t *b = lu->branches;
while (b->next) b = b->next;
b->next = ru->branches;
lu->n_branches += ru->n_branches;
}
}
else if (ru->branches)
{
lu->branches = ru->branches;
lu->n_branches = ru->n_branches;
}
lu->tot_branches += ru->tot_branches;
lu->tot_last_matched += ru->tot_last_matched - 1;
lu->tot_tags += ru->tot_tags;
u = lu;
}
node->last_matched_in_progress = u;
}
direction = TRE_TAG_MAXIMIZE;
break;
}
case ADDTAGS_AFTER_UNION_TOP:
{
tre_last_matched_branch_pre_t *b;
tre_last_matched_pre_t *u;
int start_tag;
DPRINT(("After union top\n"));
node = tre_stack_pop_voidptr(stack);
start_tag = tre_stack_pop_int(stack);
b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
+ bitstr_size(tnfa->num_tags));
if (!b)
{
status = REG_ESPACE;
break;
}
u = node->last_matched_in_progress;
u->start_tag = start_tag;
b->tot_branches = 1 + u->tot_branches;
b->tot_last_matched = u->tot_last_matched;
b->tot_tags = u->tot_tags;
b->last_matched = u;
b->n_last_matched = 1;
node->last_matched_branch = b;
node->last_matched_in_progress = NULL;
break;
}
default:
assert(0);
break;
}
}
if (status != REG_OK)
{
DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
goto error_post_compile;
}
if (!first_pass)
{
int i;
if (num_tags != tnfa->num_tags)
{
DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
tnfa->num_tags));
status = REG_BADPAT;
goto error_post_compile;
}
tre_purge_regset(regset, tnfa, tag);
DPRINT(("Setting t%d to %s\n", num_tags,
tag_dir_str[direction]));
tnfa->tag_directions[num_tags] = direction;
if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
{
DPRINT(("Processed %d reorder tags instead of %d\n",
(int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
status = REG_BADPAT;
goto error_post_compile;
}
*rtp = -1;
#if TRE_DEBUG
if (reorder_tags[0] >= 0)
{
DPRINT(("reorder_tags:\n"));
for (rtp = reorder_tags; *rtp >= 0;)
{
DPRINT(("%d after ", *rtp++));
DPRINT(("%d\n", *rtp++));
}
}
else
DPRINT(("No reorder_tags\n"));
#endif
for (i = 0; i < num_tags; i++)
to_reorder[i] = i;
for (rtp = reorder_tags; *rtp >= 0;)
{
int j, high, low;
int ti = *rtp++;
if (ti >= num_tags)
{
DPRINT(("Skipping reorder of %d\n", ti));
rtp++;
continue;
}
high = to_reorder[ti];
low = to_reorder[*rtp++];
DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
if (low > high)
{
DPRINT(("Tag %d already before %d\n", high, low));
continue;
}
for (j = 0; j < num_tags; j++)
if (to_reorder[j] > low && to_reorder[j] < high)
to_reorder[j]++;
to_reorder[ti] = low + 1;
#ifdef TRE_DEBUG
DPRINT(("to_reorder=("));
for (j = 0; j < num_tags; j++)
{
DPRINT(("%d", to_reorder[j]));
if (j < num_tags - 1)
DPRINT((","));
}
DPRINT((")\n"));
#endif
}
{
int need_reorder = 0;
for (i = 0; i < num_tags; i++)
if(to_reorder[i] != i)
{
need_reorder = 1;
break;
}
if (!need_reorder)
{
DPRINT(("Don't need to reorder\n"));
xfree(reorder_tags);
reorder_tags = NULL;
}
}
}
if (reorder_tags)
{
int i;
tre_tag_direction_t *new_tag_directions;
#if TRE_DEBUG
DPRINT(("to_reorder:"));
for (i = 0; i < num_tags; i++)
DPRINT((" %d->%d", i, to_reorder[i]));
DPRINT(("\n"));
#endif
DPRINT(("Reordering submatch_data\n"));
for (i = 0; i < (int)tnfa->num_submatches; i++)
{
#if TRE_DEBUG
int so = tnfa->submatch_data[i].so_tag;
int eo = tnfa->submatch_data[i].eo_tag;
#endif
tnfa->submatch_data[i].so_tag =
to_reorder[tnfa->submatch_data[i].so_tag];
tnfa->submatch_data[i].eo_tag =
tnfa->submatch_data[i].eo_tag < num_tags ?
to_reorder[tnfa->submatch_data[i].eo_tag] :
tnfa->submatch_data[i].eo_tag;
DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
tnfa->submatch_data[i].so_tag,
tnfa->submatch_data[i].eo_tag));
}
DPRINT(("Reordering tag_directions\n"));
new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
if (new_tag_directions == NULL)
{
status = REG_ESPACE;
goto error_post_compile;
}
for (i = 0; i < num_tags; i++)
{
new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
}
#if TRE_DEBUG
for (i = 0; i < num_tags; i++)
{
DPRINT(("t%d %s->%s\n", i,
tag_dir_str[tnfa->tag_directions[i]],
tag_dir_str[new_tag_directions[i]]));
}
DPRINT(("t%d %s->%s\n", num_tags,
tag_dir_str[tnfa->tag_directions[num_tags]],
tag_dir_str[tnfa->tag_directions[num_tags]]));
#endif
memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
xfree(new_tag_directions);
DPRINT(("Reordering minimal_tags\n"));
for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
to_reorder[tnfa->minimal_tags[i]] :
tnfa->minimal_tags[i];
DPRINT(("Reordering AST tags\n"));
STACK_PUSH(stack, voidptr, tree);
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
{
node = tre_stack_pop_voidptr(stack);
switch (node->type)
{
case LITERAL:
{
tre_literal_t *lit = (tre_literal_t *)node->obj;
if (IS_TAG(lit))
lit->code_max = to_reorder[lit->code_max];
break;
}
case UNION:
{
tre_union_t *uni = (tre_union_t *)node->obj;
STACK_PUSHX(stack, voidptr, uni->right);
STACK_PUSHX(stack, voidptr, uni->left);
break;
}
case CATENATION:
{
tre_catenation_t *cat = (tre_catenation_t *)node->obj;
STACK_PUSHX(stack, voidptr, cat->right);
STACK_PUSHX(stack, voidptr, cat->left);
break;
}
case ITERATION:
{
tre_iteration_t *iter = (tre_iteration_t *)node->obj;
STACK_PUSHX(stack, voidptr, iter->arg);
break;
}
default:
assert(0);
break;
}
}
if (status != REG_OK)
{
DPRINT(("Error while reordering tags\n"));
goto error_post_compile;
}
}
if (!first_pass)
{
if (tree->last_matched_branch)
{
tre_last_matched_branch_t *buf, *b, *bb;
tre_last_matched_branch_pre_t *bp;
tre_last_matched_t *u, *uu;
tre_last_matched_pre_t *up;
int *t;
int i;
#ifdef TRE_DEBUG
tre_last_matched_branch_t *_b;
tre_last_matched_t *_u;
int *_t;
DPRINT(("last_match_branch_pre:\n"));
print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
#endif
buf = (tre_last_matched_branch_t *)xcalloc(1,
tree->last_matched_branch->tot_branches
* sizeof(tre_last_matched_branch_t) +
tree->last_matched_branch->tot_last_matched
* sizeof(tre_last_matched_t) +
tree->last_matched_branch->tot_tags *
sizeof(int));
if (!buf)
{
status = REG_ESPACE;
goto error_post_compile;
}
b = buf;
u = (tre_last_matched_t *)(b +
tree->last_matched_branch->tot_branches);
t = (int *)(u + tree->last_matched_branch->tot_last_matched);
#ifdef TRE_DEBUG
_b = b;
_u = u;
_t = t;
#endif
DPRINT(("Copying info_pre to info\n"));
STACK_PUSH(stack, voidptr, tree->last_matched_branch);
STACK_PUSH(stack, int, 1);
STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
{
switch (tre_stack_pop_int(stack))
{
case COPY_LAST_MATCHED_BRANCH:
i = tre_stack_pop_int(stack);
STACK_PUSHX(stack, voidptr, b);
STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
b += i;
break;
case COPY_LAST_MATCHED_BRANCH_NEXT:
bb = tre_stack_pop_voidptr(stack);
bp = tre_stack_pop_voidptr(stack);
bb->n_last_matched = bp->n_last_matched;
bb->cmp_tag = bp->cmp_tag;
if (bp->n_tags > 0)
{
int n;
n = bb->n_tags = bp->n_tags;
bb->tags = t;
for (i = 0; i < num_tags; i++)
if (bit_test(bp->tags, i))
{
*t++ = i;
if (--n <= 0)
break;
}
}
if (bp->next)
{
STACK_PUSHX(stack, voidptr, bp->next);
STACK_PUSHX(stack, voidptr, bb + 1);
STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
}
if (bp->n_last_matched > 0)
{
bb->last_matched = u;
STACK_PUSHX(stack, voidptr, bp->last_matched);
STACK_PUSHX(stack, int, bp->n_last_matched);
STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
}
break;
case COPY_LAST_MATCHED:
i = tre_stack_pop_int(stack);
STACK_PUSHX(stack, voidptr, u);
STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
u += i;
break;
case COPY_LAST_MATCHED_NEXT:
uu = tre_stack_pop_voidptr(stack);
up = tre_stack_pop_voidptr(stack);
uu->n_branches = up->n_branches;
uu->branches = b;
uu->start_tag = up->start_tag;
if (up->next)
{
STACK_PUSHX(stack, voidptr, up->next);
STACK_PUSHX(stack, voidptr, uu + 1);
STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
}
STACK_PUSHX(stack, voidptr, up->branches);
STACK_PUSHX(stack, int, up->n_branches);
STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
break;
}
}
if (status != REG_OK)
goto error_post_compile;
#ifdef TRE_DEBUG
DPRINT(("last_matched_branch:\n"));
print_last_match_branch(buf, 0);
if (b != _b + tree->last_matched_branch->tot_branches)
DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
b, _b + tree->last_matched_branch->tot_branches));
if (u != _u + tree->last_matched_branch->tot_last_matched)
DPRINT(("u/%p != _u + "
"tree->last_matched_branch->tot_last_matched/%p\n",
u, _u + tree->last_matched_branch->tot_last_matched));
if (t != _t + tree->last_matched_branch->tot_tags)
DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
t, _t + tree->last_matched_branch->tot_tags));
#endif
tnfa->last_matched_branch = buf;
}
#ifdef TRE_DEBUG
else
DPRINT(("No last_match_branch_pre\n"));
#endif
}
DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
first_pass? "First pass" : "Second pass", num_tags));
#ifdef TRE_DEBUG
tre_ast_print(tree);
#endif
DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
num_tags));
assert(tree->num_tags == num_tags);
tnfa->end_tag = num_tags;
tnfa->num_tags = num_tags;
tnfa->num_minimals = num_minimals;
error_post_compile:
xfree(reorder_tags);
error_reorder_tags:
xfree(orig_regset);
error_regset:
return status;
}
typedef enum {
COPY_RECURSE,
COPY_SET_RESULT_PTR
} tre_copyast_symbol_t;
#define COPY_REMOVE_TAGS 1
#define COPY_MAXIMIZE_FIRST_TAG 2
static reg_errcode_t
tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
int flags, int *pos_add, tre_tag_direction_t *tag_directions,
tre_ast_node_t **copy, int *max_pos)
{
reg_errcode_t status = REG_OK;
int bottom = tre_stack_num_objects(stack);
int num_copied = 0;
int first_tag = 1;
tre_ast_node_t **result = copy;
tre_copyast_symbol_t symbol;
STACK_PUSH(stack, voidptr, ast);
STACK_PUSH(stack, int, COPY_RECURSE);
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
{
tre_ast_node_t *node;
if (status != REG_OK)
break;
symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
switch (symbol)
{
case COPY_SET_RESULT_PTR:
result = tre_stack_pop_voidptr(stack);
break;
case COPY_RECURSE:
node = tre_stack_pop_voidptr(stack);
switch (node->type)
{
case LITERAL:
{
tre_literal_t *lit = node->obj;
int pos = lit->position;
int min = lit->code_min;
int max = lit->code_max;
tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
lit->u.bracket_match_list :
NULL;
if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
{
pos += *pos_add;
num_copied++;
}
else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
{
min = EMPTY;
max = pos = -1;
}
else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
&& first_tag)
{
if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
tag_directions[max] = TRE_TAG_MAXIMIZE;
first_tag = 0;
}
*result = tre_ast_new_literal(mem, min, max, pos);
if (*result == NULL)
status = REG_ESPACE;
if (pos > *max_pos)
*max_pos = pos;
if (!IS_SPECIAL(lit))
((tre_literal_t *)(*result)->obj)->u.bracket_match_list
= list;
break;
}
case UNION:
{
tre_union_t *uni = node->obj;
tre_union_t *tmp;
*result = tre_ast_new_union(mem, uni->left, uni->right);
if (*result == NULL)
{
status = REG_ESPACE;
break;
}
tmp = (*result)->obj;
result = &tmp->left;
STACK_PUSHX(stack, voidptr, uni->right);
STACK_PUSHX(stack, int, COPY_RECURSE);
STACK_PUSHX(stack, voidptr, &tmp->right);
STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
STACK_PUSHX(stack, voidptr, uni->left);
STACK_PUSHX(stack, int, COPY_RECURSE);
break;
}
case CATENATION:
{
tre_catenation_t *cat = node->obj;
tre_catenation_t *tmp;
*result = tre_ast_new_catenation(mem, cat->left, cat->right);
if (*result == NULL)
{
status = REG_ESPACE;
break;
}
tmp = (*result)->obj;
tmp->left = NULL;
tmp->right = NULL;
result = &tmp->left;
STACK_PUSHX(stack, voidptr, cat->right);
STACK_PUSHX(stack, int, COPY_RECURSE);
STACK_PUSHX(stack, voidptr, &tmp->right);
STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
STACK_PUSHX(stack, voidptr, cat->left);
STACK_PUSHX(stack, int, COPY_RECURSE);
break;
}
case ITERATION:
{
tre_iteration_t *iter = node->obj;
STACK_PUSHX(stack, voidptr, iter->arg);
STACK_PUSHX(stack, int, COPY_RECURSE);
*result = tre_ast_new_iter(mem, iter->arg, iter->min,
iter->max, iter->minimal);
if (*result == NULL)
{
status = REG_ESPACE;
break;
}
iter = (*result)->obj;
result = &iter->arg;
break;
}
default:
assert(0);
break;
}
break;
}
}
*pos_add += num_copied;
return status;
}
typedef enum {
EXPAND_RECURSE,
EXPAND_AFTER_ITER
} tre_expand_ast_symbol_t;
static reg_errcode_t
tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
int *position, tre_tag_direction_t *tag_directions,
int __unused *max_depth)
{
reg_errcode_t status = REG_OK;
int bottom = tre_stack_num_objects(stack);
int pos_add = 0;
int pos_add_total = 0;
int max_pos = 0;
#ifdef TRE_APPROX
int params[TRE_PARAM_LAST];
int params_depth = 0;
#endif
int iter_depth = 0;
#ifdef TRE_APPROX
int i;
#endif
#ifdef TRE_APPROX
for (i = 0; i < TRE_PARAM_LAST; i++)
params[i] = TRE_PARAM_DEFAULT;
#endif
STACK_PUSHR(stack, voidptr, ast);
STACK_PUSHR(stack, int, EXPAND_RECURSE);
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
{
tre_ast_node_t *node;
tre_expand_ast_symbol_t symbol;
if (status != REG_OK)
break;
DPRINT(("pos_add %d\n", pos_add));
symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
node = tre_stack_pop_voidptr(stack);
switch (symbol)
{
case EXPAND_RECURSE:
switch (node->type)
{
case LITERAL:
{
tre_literal_t *lit= node->obj;
if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
{
lit->position += pos_add;
if (lit->position > max_pos)
max_pos = lit->position;
}
break;
}
case UNION:
{
tre_union_t *uni = node->obj;
STACK_PUSHX(stack, voidptr, uni->right);
STACK_PUSHX(stack, int, EXPAND_RECURSE);
STACK_PUSHX(stack, voidptr, uni->left);
STACK_PUSHX(stack, int, EXPAND_RECURSE);
break;
}
case CATENATION:
{
tre_catenation_t *cat = node->obj;
STACK_PUSHX(stack, voidptr, cat->right);
STACK_PUSHX(stack, int, EXPAND_RECURSE);
STACK_PUSHX(stack, voidptr, cat->left);
STACK_PUSHX(stack, int, EXPAND_RECURSE);
break;
}
case ITERATION:
{
tre_iteration_t *iter = node->obj;
STACK_PUSHX(stack, int, pos_add);
STACK_PUSHX(stack, voidptr, node);
STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
STACK_PUSHX(stack, voidptr, iter->arg);
STACK_PUSHX(stack, int, EXPAND_RECURSE);
if (iter->min > 1 || iter->max > 1)
pos_add = 0;
iter_depth++;
DPRINT(("iter\n"));
break;
}
default:
assert(0);
break;
}
break;
case EXPAND_AFTER_ITER:
{
tre_iteration_t *iter = node->obj;
int pos_add_last;
pos_add = tre_stack_pop_int(stack);
pos_add_last = pos_add;
if (iter->min == 0 && iter->max == 0)
{
tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
if (empty == NULL)
return REG_ESPACE;
node->obj = empty->obj;
node->type = empty->type;
}
else if (iter->min > 1 || iter->max > 1)
{
tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
int j;
int pos_add_save = pos_add;
for (j = 0; j < iter->min; j++)
{
tre_ast_node_t *copy;
int flags = ((j + 1 < iter->min)
? COPY_REMOVE_TAGS
: COPY_MAXIMIZE_FIRST_TAG);
DPRINT((" pos_add %d\n", pos_add));
pos_add_save = pos_add;
status = tre_copy_ast(mem, stack, iter->arg, flags,
&pos_add, tag_directions, ©,
&max_pos);
if (status != REG_OK)
return status;
if (seq1 != NULL)
seq1 = tre_ast_new_catenation(mem, seq1, copy);
else
seq1 = copy;
if (seq1 == NULL)
return REG_ESPACE;
}
if (iter->max == -1)
{
pos_add_save = pos_add;
status = tre_copy_ast(mem, stack, iter->arg, 0,
&pos_add, NULL, &seq2, &max_pos);
if (status != REG_OK)
return status;
seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
if (seq2 == NULL)
return REG_ESPACE;
}
else
{
for (j = iter->min; j < iter->max; j++)
{
tre_ast_node_t *tmp, *copy;
pos_add_save = pos_add;
status = tre_copy_ast(mem, stack, iter->arg, 0,
&pos_add, NULL, ©, &max_pos);
if (status != REG_OK)
return status;
if (seq2 != NULL)
seq2 = tre_ast_new_catenation(mem, copy, seq2);
else
seq2 = copy;
if (seq2 == NULL)
return REG_ESPACE;
tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
if (tmp == NULL)
return REG_ESPACE;
seq2 = tre_ast_new_union(mem, tmp, seq2);
if (seq2 == NULL)
return REG_ESPACE;
}
}
pos_add = pos_add_save;
if (seq1 == NULL)
seq1 = seq2;
else if (seq2 != NULL)
seq1 = tre_ast_new_catenation(mem, seq1, seq2);
if (seq1 == NULL)
return REG_ESPACE;
node->obj = seq1->obj;
node->type = seq1->type;
}
iter_depth--;
pos_add_total += pos_add - pos_add_last;
if (iter_depth == 0)
pos_add = pos_add_total;
#ifdef TRE_APPROX
if (iter->params)
{
tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
int *old_params;
tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
if (!tmp_l)
return REG_ESPACE;
((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
if (!tmp_r)
return REG_ESPACE;
old_params = tre_mem_alloc(mem, sizeof(*old_params)
* TRE_PARAM_LAST);
if (!old_params)
return REG_ESPACE;
for (i = 0; i < TRE_PARAM_LAST; i++)
old_params[i] = params[i];
((tre_literal_t *)tmp_r->obj)->u.params = old_params;
old_params[TRE_PARAM_DEPTH] = params_depth;
node_copy = tre_ast_new_node(mem, ITERATION,
sizeof(tre_iteration_t));
if (!node_copy)
return REG_ESPACE;
node_copy->obj = node->obj;
tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
if (!tmp_node)
return REG_ESPACE;
tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
if (!tmp_node)
return REG_ESPACE;
memcpy(node, tmp_node, sizeof(*node));
node->obj = tmp_node->obj;
node->type = tmp_node->type;
params_depth++;
if (params_depth > *max_depth)
*max_depth = params_depth;
}
#endif
break;
}
default:
assert(0);
break;
}
}
*position += pos_add_total;
if (max_pos > *position)
*position = max_pos;
#ifdef TRE_DEBUG
DPRINT(("Expanded AST:\n"));
tre_ast_print(ast);
DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
#endif
return status;
}
static tre_pos_and_tags_t *
tre_set_empty(tre_mem_t mem)
{
tre_pos_and_tags_t *new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set));
if (new_set == NULL)
return NULL;
new_set[0].position = -1;
new_set[0].code_min = -1;
new_set[0].code_max = -1;
return new_set;
}
static tre_pos_and_tags_t *
tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
tre_bracket_match_list_t *bracket_match_list, int backref)
{
tre_pos_and_tags_t *new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
if (new_set == NULL)
return NULL;
new_set[0].position = position;
new_set[0].code_min = code_min;
new_set[0].code_max = code_max;
new_set[0].bracket_match_list = bracket_match_list;
new_set[0].backref = backref;
new_set[1].position = -1;
new_set[1].code_min = -1;
new_set[1].code_max = -1;
return new_set;
}
static tre_pos_and_tags_t *
tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
int *tags, int assertions, int *params)
{
int s1, s2, i, j;
tre_pos_and_tags_t *new_set;
int *new_tags;
int num_tags;
for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
for (s1 = 0; set1[s1].position >= 0; s1++);
for (s2 = 0; set2[s2].position >= 0; s2++);
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
if (!new_set )
return NULL;
for (s1 = 0; set1[s1].position >= 0; s1++)
{
new_set[s1].position = set1[s1].position;
new_set[s1].code_min = set1[s1].code_min;
new_set[s1].code_max = set1[s1].code_max;
new_set[s1].assertions = set1[s1].assertions | assertions;
new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
new_set[s1].backref = set1[s1].backref;
if (set1[s1].tags == NULL && tags == NULL)
new_set[s1].tags = NULL;
else
{
for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
* (i + num_tags + 1)));
if (new_tags == NULL)
return NULL;
for (j = 0; j < i; j++)
new_tags[j] = set1[s1].tags[j];
for (i = 0; i < num_tags; i++)
new_tags[j + i] = tags[i];
new_tags[j + i] = -1;
new_set[s1].tags = new_tags;
}
if (set1[s1].params)
new_set[s1].params = set1[s1].params;
if (params)
{
if (!new_set[s1].params)
new_set[s1].params = params;
else
{
new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
TRE_PARAM_LAST);
if (!new_set[s1].params)
return NULL;
for (i = 0; i < TRE_PARAM_LAST; i++)
if (params[i] != TRE_PARAM_UNSET)
new_set[s1].params[i] = params[i];
}
}
}
for (s2 = 0; set2[s2].position >= 0; s2++)
{
new_set[s1 + s2].position = set2[s2].position;
new_set[s1 + s2].code_min = set2[s2].code_min;
new_set[s1 + s2].code_max = set2[s2].code_max;
new_set[s1 + s2].assertions = set2[s2].assertions;
new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
new_set[s1 + s2].backref = set2[s2].backref;
if (set2[s2].tags == NULL)
new_set[s1 + s2].tags = NULL;
else
{
for (i = 0; set2[s2].tags[i] >= 0; i++);
new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
if (new_tags == NULL)
return NULL;
for (j = 0; j < i; j++)
new_tags[j] = set2[s2].tags[j];
new_tags[j] = -1;
new_set[s1 + s2].tags = new_tags;
}
if (set2[s2].params)
new_set[s1 + s2].params = set2[s2].params;
if (params)
{
if (!new_set[s1 + s2].params)
new_set[s1 + s2].params = params;
else
{
new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
TRE_PARAM_LAST);
if (!new_set[s1 + s2].params)
return NULL;
for (i = 0; i < TRE_PARAM_LAST; i++)
if (params[i] != TRE_PARAM_UNSET)
new_set[s1 + s2].params[i] = params[i];
}
}
}
new_set[s1 + s2].position = -1;
return new_set;
}
static reg_errcode_t
tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
int *assertions, int *params, int *num_tags_seen,
int *params_seen)
{
tre_literal_t *lit;
tre_union_t *uni;
tre_catenation_t *cat;
tre_iteration_t *iter;
int i;
int bottom = tre_stack_num_objects(stack);
reg_errcode_t status = REG_OK;
if (num_tags_seen)
*num_tags_seen = 0;
if (params_seen)
*params_seen = 0;
status = tre_stack_push_voidptr(stack, node);
while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
{
node = tre_stack_pop_voidptr(stack);
switch (node->type)
{
case LITERAL:
lit = (tre_literal_t *)node->obj;
switch (lit->code_min)
{
case TAG:
if (lit->code_max >= 0)
{
if (tags != NULL)
{
for (i = 0; tags[i] >= 0; i++)
if (tags[i] == lit->code_max)
break;
if (tags[i] < 0)
{
tags[i] = lit->code_max;
tags[i + 1] = -1;
}
}
if (num_tags_seen)
(*num_tags_seen)++;
}
break;
case ASSERTION:
assert(lit->code_max >= 1
|| lit->code_max <= ASSERT_LAST);
if (assertions != NULL)
*assertions |= lit->code_max;
break;
case PARAMETER:
if (params != NULL)
for (i = 0; i < TRE_PARAM_LAST; i++)
params[i] = lit->u.params[i];
if (params_seen != NULL)
*params_seen = 1;
break;
case EMPTY:
break;
default:
assert(0);
break;
}
break;
case UNION:
uni = (tre_union_t *)node->obj;
if (uni->left->nullable)
STACK_PUSHX(stack, voidptr, uni->left)
else if (uni->right->nullable)
STACK_PUSHX(stack, voidptr, uni->right)
else
assert(0);
break;
case CATENATION:
cat = (tre_catenation_t *)node->obj;
assert(cat->left->nullable);
assert(cat->right->nullable);
STACK_PUSHX(stack, voidptr, cat->left);
STACK_PUSHX(stack, voidptr, cat->right);
break;
case ITERATION:
iter = (tre_iteration_t *)node->obj;
if (iter->arg->nullable)
STACK_PUSHX(stack, voidptr, iter->arg);
break;
default:
assert(0);
break;
}
}
return status;
}
typedef enum {
NFL_RECURSE,
NFL_POST_UNION,
NFL_POST_CATENATION,
NFL_POST_ITERATION
} tre_nfl_stack_symbol_t;
static reg_errcode_t
tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
{
int bottom = tre_stack_num_objects(stack);
STACK_PUSHR(stack, voidptr, tree);
STACK_PUSHR(stack, int, NFL_RECURSE);
while (tre_stack_num_objects(stack) > bottom)
{
tre_nfl_stack_symbol_t symbol;
tre_ast_node_t *node;
symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
node = tre_stack_pop_voidptr(stack);
switch (symbol)
{
case NFL_RECURSE:
switch (node->type)
{
case LITERAL:
{
tre_literal_t *lit = (tre_literal_t *)node->obj;
if (IS_BACKREF(lit))
{
node->nullable = 0;
node->firstpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, NULL, -1);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, NULL,
(int)lit->code_max);
if (!node->lastpos)
return REG_ESPACE;
}
else if (lit->code_min < 0)
{
node->nullable = 1;
node->firstpos = tre_set_empty(mem);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_empty(mem);
if (!node->lastpos)
return REG_ESPACE;
}
else
{
node->nullable = 0;
node->firstpos =
tre_set_one(mem, lit->position, (int)lit->code_min,
(int)lit->code_max, NULL, -1);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position,
(int)lit->code_min,
(int)lit->code_max,
lit->u.bracket_match_list,
-1);
if (!node->lastpos)
return REG_ESPACE;
}
break;
}
case UNION:
STACK_PUSHR(stack, voidptr, node);
STACK_PUSHR(stack, int, NFL_POST_UNION);
STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
STACK_PUSHR(stack, int, NFL_RECURSE);
break;
case CATENATION:
STACK_PUSHR(stack, voidptr, node);
STACK_PUSHR(stack, int, NFL_POST_CATENATION);
STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
STACK_PUSHR(stack, int, NFL_RECURSE);
break;
case ITERATION:
STACK_PUSHR(stack, voidptr, node);
STACK_PUSHR(stack, int, NFL_POST_ITERATION);
STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
STACK_PUSHR(stack, int, NFL_RECURSE);
break;
}
break;
case NFL_POST_UNION:
{
tre_union_t *uni = (tre_union_t *)node->obj;
node->nullable = uni->left->nullable || uni->right->nullable;
node->firstpos = tre_set_union(mem, uni->left->firstpos,
uni->right->firstpos, NULL, 0, NULL);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_union(mem, uni->left->lastpos,
uni->right->lastpos, NULL, 0, NULL);
if (!node->lastpos)
return REG_ESPACE;
break;
}
case NFL_POST_ITERATION:
{
int num_tags, *tags, assertions, params_seen;
int *params;
reg_errcode_t status;
tre_iteration_t *iter = (tre_iteration_t *)node->obj;
if (iter->min == 0 || iter->arg->nullable)
{
node->nullable = 1;
if (iter->arg->nullable)
{
status = tre_match_empty(stack, iter->arg,
NULL, NULL, NULL, &num_tags,
¶ms_seen);
if (status != REG_OK)
return status;
tags = xmalloc(sizeof(int) * (num_tags + 1));
if (!tags)
return REG_ESPACE;
tags[0] = -1;
assertions = 0;
params = NULL;
if (params_seen)
{
params = tre_mem_alloc(mem, sizeof(*params)
* TRE_PARAM_LAST);
if (!params)
{
xfree(tags);
return REG_ESPACE;
}
}
status = tre_match_empty(stack, iter->arg, tags,
&assertions, params, NULL, NULL);
if (status != REG_OK)
{
xfree(tags);
return status;
}
node->lastpos =
tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
tags, assertions, params);
xfree(tags);
if (!node->lastpos)
return REG_ESPACE;
}
else
node->lastpos = iter->arg->lastpos;
}
else
{
node->nullable = 0;
node->lastpos = iter->arg->lastpos;
}
node->firstpos = iter->arg->firstpos;
break;
}
case NFL_POST_CATENATION:
{
int num_tags, *tags, assertions, params_seen;
int *params;
reg_errcode_t status;
tre_catenation_t *cat = node->obj;
node->nullable = cat->left->nullable && cat->right->nullable;
if (cat->left->nullable)
{
status = tre_match_empty(stack, cat->left,
NULL, NULL, NULL, &num_tags,
¶ms_seen);
if (status != REG_OK)
return status;
tags = xmalloc(sizeof(*tags) * (num_tags + 1));
if (!tags)
return REG_ESPACE;
tags[0] = -1;
assertions = 0;
params = NULL;
if (params_seen)
{
params = tre_mem_alloc(mem, sizeof(*params)
* TRE_PARAM_LAST);
if (!params)
{
xfree(tags);
return REG_ESPACE;
}
}
status = tre_match_empty(stack, cat->left, tags,
&assertions, params, NULL, NULL);
if (status != REG_OK)
{
xfree(tags);
return status;
}
node->firstpos =
tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
tags, assertions, params);
xfree(tags);
if (!node->firstpos)
return REG_ESPACE;
}
else
{
node->firstpos = cat->left->firstpos;
}
if (cat->right->nullable)
{
status = tre_match_empty(stack, cat->right,
NULL, NULL, NULL, &num_tags,
¶ms_seen);
if (status != REG_OK)
return status;
tags = xmalloc(sizeof(int) * (num_tags + 1));
if (!tags)
return REG_ESPACE;
tags[0] = -1;
assertions = 0;
params = NULL;
if (params_seen)
{
params = tre_mem_alloc(mem, sizeof(*params)
* TRE_PARAM_LAST);
if (!params)
{
xfree(tags);
return REG_ESPACE;
}
}
status = tre_match_empty(stack, cat->right, tags,
&assertions, params, NULL, NULL);
if (status != REG_OK)
{
xfree(tags);
return status;
}
node->lastpos =
tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
tags, assertions, params);
xfree(tags);
if (!node->lastpos)
return REG_ESPACE;
}
else
{
node->lastpos = cat->right->lastpos;
}
break;
}
default:
assert(0);
break;
}
}
return REG_OK;
}
static reg_errcode_t
tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
tre_tnfa_transition_t *transitions,
int *counts, int *offs)
{
tre_pos_and_tags_t *orig_p2 = p2;
tre_tnfa_transition_t *trans;
int i, j, k, l, dup, prev_p2_pos;
if (transitions != NULL)
while (p1->position >= 0)
{
p2 = orig_p2;
prev_p2_pos = -1;
while (p2->position >= 0)
{
if (p2->position == prev_p2_pos)
{
p2++;
continue;
}
prev_p2_pos = p2->position;
trans = transitions + offs[p1->position];
while (trans->state != NULL)
{
#if 0
if (trans->state_id == p2->position)
{
DPRINT(("*"));
break;
}
#endif
trans++;
}
if (trans->state == NULL)
(trans + 1)->state = NULL;
trans->code_min = p1->code_min;
trans->code_max = p1->code_max;
trans->state = transitions + offs[p2->position];
trans->state_id = p2->position;
trans->assertions = p1->assertions | p2->assertions
| (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
if (p1->backref >= 0)
{
assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
assert(p2->backref < 0);
trans->u.backref = p1->backref;
trans->assertions |= ASSERT_BACKREF;
}
if (p1->bracket_match_list != NULL)
{
trans->u.bracket_match_list =
xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
if (trans->u.bracket_match_list == NULL)
return REG_ESPACE;
memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
}
i = 0;
if (p1->tags != NULL)
while(p1->tags[i] >= 0)
i++;
j = 0;
if (p2->tags != NULL)
while(p2->tags[j] >= 0)
j++;
if (trans->tags != NULL)
xfree(trans->tags);
trans->tags = NULL;
if (i + j > 0)
{
trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
if (!trans->tags)
return REG_ESPACE;
i = 0;
if (p1->tags != NULL)
while(p1->tags[i] >= 0)
{
trans->tags[i] = p1->tags[i];
i++;
}
l = i;
j = 0;
if (p2->tags != NULL)
while (p2->tags[j] >= 0)
{
dup = 0;
for (k = 0; k < i; k++)
if (trans->tags[k] == p2->tags[j])
{
dup = 1;
break;
}
if (!dup)
trans->tags[l++] = p2->tags[j];
j++;
}
trans->tags[l] = -1;
}
if (p1->params || p2->params)
{
if (!trans->params)
trans->params = xmalloc(sizeof(*trans->params)
* TRE_PARAM_LAST);
if (!trans->params)
return REG_ESPACE;
for (i = 0; i < TRE_PARAM_LAST; i++)
{
trans->params[i] = TRE_PARAM_UNSET;
if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
trans->params[i] = p1->params[i];
if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
trans->params[i] = p2->params[i];
}
}
else
{
if (trans->params)
xfree(trans->params);
trans->params = NULL;
}
#ifdef TRE_DEBUG
{
int *tags;
DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
p1->code_min));
if (p1->code_max != p1->code_min)
DPRINT(("-%3d", p1->code_max));
tags = trans->tags;
if (tags)
{
DPRINT((", tags ["));
while (*tags >= 0)
{
DPRINT(("%d", *tags));
tags++;
if (*tags >= 0)
DPRINT((","));
}
DPRINT(("]"));
}
if (trans->assertions)
DPRINT((", assert %d", trans->assertions));
if (trans->assertions & ASSERT_BACKREF)
DPRINT((", backref %d", trans->u.backref));
else if (trans->assertions & ASSERT_BRACKET_MATCH)
DPRINT((", bracket_match_list %p",
trans->u.bracket_match_list));
if (trans->params)
{
DPRINT((", "));
tre_print_params(trans->params);
}
DPRINT(("\n"));
}
#endif
p2++;
}
p1++;
}
else
while (p1->position >= 0)
{
p2 = orig_p2;
while (p2->position >= 0)
{
counts[p1->position]++;
p2++;
}
p1++;
}
return REG_OK;
}
static reg_errcode_t
tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
int *counts, int *offs)
{
tre_union_t *uni;
tre_catenation_t *cat;
tre_iteration_t *iter;
reg_errcode_t errcode = REG_OK;
switch (node->type)
{
case LITERAL:
break;
case UNION:
uni = (tre_union_t *)node->obj;
errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
if (errcode != REG_OK)
return errcode;
errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
break;
case CATENATION:
cat = (tre_catenation_t *)node->obj;
errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
transitions, counts, offs);
if (errcode != REG_OK)
return errcode;
errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
if (errcode != REG_OK)
return errcode;
errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
break;
case ITERATION:
iter = (tre_iteration_t *)node->obj;
assert(iter->max == -1 || iter->max == 1);
if (iter->max == -1)
{
assert(iter->min == 0 || iter->min == 1);
errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
transitions, counts, offs);
if (errcode != REG_OK)
return errcode;
}
errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
break;
}
return errcode;
}
#define ERROR_EXIT(err) \
do \
{ \
errcode = err; \
if (1) \
goto error_exit; \
} \
while (0)
int
tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
locale_t loc)
{
tre_stack_t *stack;
tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
tre_pos_and_tags_t *p;
int *counts = NULL, *offs = NULL;
int i, add = 0;
tre_tnfa_transition_t *transitions, *initial;
tre_tnfa_t *tnfa = NULL;
tre_submatch_data_t *submatch_data = NULL;
tre_tag_direction_t *tag_directions = NULL;
reg_errcode_t errcode;
tre_mem_t mem;
tre_parse_ctx_t parse_ctx;
stack = tre_stack_new(512, 10240, 128);
if (!stack)
return REG_ESPACE;
mem = tre_mem_new();
if (!mem)
{
tre_stack_destroy(stack);
return REG_ESPACE;
}
memset(&parse_ctx, 0, sizeof(parse_ctx));
parse_ctx.mem = mem;
parse_ctx.stack = stack;
parse_ctx.re = regex;
parse_ctx.len = n;
if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
cflags &= ~REG_UNGREEDY;
parse_ctx.cflags = cflags;
parse_ctx.max_backref = -1;
parse_ctx.loc = loc;
parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
errcode = tre_parse(&parse_ctx);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
preg->re_nsub = parse_ctx.submatch_id - 1;
tree = parse_ctx.result;
if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
ERROR_EXIT(REG_BADPAT);
#ifdef TRE_DEBUG
tre_ast_print(tree);
#endif
if (parse_ctx.max_backref > (int)preg->re_nsub)
ERROR_EXIT(REG_ESUBREG);
tnfa = xcalloc(1, sizeof(tre_tnfa_t));
if (tnfa == NULL)
ERROR_EXIT(REG_ESPACE);
tnfa->have_backrefs = parse_ctx.max_backref >= 0;
tnfa->have_approx = parse_ctx.have_approx;
tnfa->num_submatches = parse_ctx.submatch_id;
tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
- SUBMATCH_ID_INVISIBLE_START;
tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
tnfa->loc = parse_ctx.loc;
if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
{
DPRINT(("tre_compile: setting up tags\n"));
errcode = tre_add_tags(NULL, stack, tree, tnfa);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
#ifdef TRE_DEBUG
tre_ast_print(tree);
#endif
if (tnfa->num_tags > 0)
{
tag_directions = xmalloc(sizeof(*tag_directions)
* (tnfa->num_tags + 1));
if (tag_directions == NULL)
ERROR_EXIT(REG_ESPACE);
tnfa->tag_directions = tag_directions;
memset(tag_directions, -1,
sizeof(*tag_directions) * (tnfa->num_tags + 1));
}
tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
sizeof(*tnfa->minimal_tags));
if (tnfa->minimal_tags == NULL)
ERROR_EXIT(REG_ESPACE);
submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
sizeof(*submatch_data));
if (submatch_data == NULL)
ERROR_EXIT(REG_ESPACE);
for (i = 0; i < parse_ctx.submatch_id; i++)
{
submatch_data[i].eo_tag = -1;
}
tnfa->submatch_data = submatch_data;
errcode = tre_add_tags(mem, stack, tree, tnfa);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
#ifdef TRE_DEBUG
for (i = 0; i < parse_ctx.submatch_id; i++)
DPRINT(("pmatch[%d] = {t%d, t%d}\n",
i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
for (i = 0; i <= tnfa->num_tags; i++)
DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
#endif
}
errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
tag_directions, &tnfa->params_depth);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
tmp_ast_l = tree;
tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
if (tmp_ast_r == NULL)
ERROR_EXIT(REG_ESPACE);
tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
if (tree == NULL)
ERROR_EXIT(REG_ESPACE);
#ifdef TRE_DEBUG
tre_ast_print(tree);
DPRINT(("Number of states: %d\n", parse_ctx.position));
if (submatch_data)
for (i = 0; i < parse_ctx.submatch_id; i++)
DPRINT(("pmatch[%d] = {t%d, t%d}\n",
i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
if (tag_directions)
for (i = 0; i <= tnfa->num_tags; i++)
DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
#endif
errcode = tre_compute_nfl(mem, stack, tree);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
counts = xmalloc(sizeof(int) * parse_ctx.position);
if (counts == NULL)
ERROR_EXIT(REG_ESPACE);
offs = xmalloc(sizeof(int) * parse_ctx.position);
if (offs == NULL)
ERROR_EXIT(REG_ESPACE);
for (i = 0; i < parse_ctx.position; i++)
counts[i] = 0;
tre_ast_to_tnfa(tree, NULL, counts, NULL);
add = 0;
for (i = 0; i < parse_ctx.position; i++)
{
offs[i] = add;
add += counts[i] + 1;
counts[i] = 0;
}
transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
if (transitions == NULL)
ERROR_EXIT(REG_ESPACE);
tnfa->transitions = transitions;
tnfa->num_transitions = add;
DPRINT(("Converting to TNFA:\n"));
errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
#ifdef USE_FIRSTPOS_CHARS
tnfa->first_char = -1;
if (TRE_MB_CUR_MAX_L(tnfa->loc) == 1 && !tmp_ast_l->nullable)
{
int count = 0;
tre_cint_t k;
DPRINT(("Characters that can start a match:"));
tnfa->firstpos_chars = xcalloc(256, sizeof(char));
if (tnfa->firstpos_chars == NULL)
ERROR_EXIT(REG_ESPACE);
for (p = tree->firstpos; p->position >= 0; p++)
{
tre_tnfa_transition_t *j = transitions + offs[p->position];
while (j->state != NULL)
{
for (k = j->code_min; k <= j->code_max && k < 256; k++)
{
DPRINT((" %d", k));
tnfa->firstpos_chars[k] = 1;
count++;
}
j++;
}
}
DPRINT(("\n"));
#define TRE_OPTIMIZE_FIRST_CHAR 1
#if TRE_OPTIMIZE_FIRST_CHAR
if (count == 1)
{
for (k = 0; k < 256; k++)
if (tnfa->firstpos_chars[k])
{
DPRINT(("first char must be %d\n", k));
tnfa->first_char = k;
xfree(tnfa->firstpos_chars);
tnfa->firstpos_chars = NULL;
break;
}
}
#endif
}
else
tnfa->firstpos_chars = NULL;
#else
tnfa->first_char = -1;
if (!tmp_ast_l->nullable)
{
int scanning = 1;
for (p = tree->firstpos; scanning && p->position >= 0; p++)
{
tre_tnfa_transition_t *j = transitions + offs[p->position];
while (j->state != NULL)
{
if (j->code_min <= j->code_max)
{
if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
{
tnfa->first_char = -1;
scanning = 0;
break;
}
tnfa->first_char = j->code_min;
}
j++;
}
}
#ifdef TRE_DEBUG
if (tnfa->first_char >= 0)
DPRINT(("first char must be %d\n", tnfa->first_char));
#endif
}
#endif
p = tree->firstpos;
i = 0;
while (p->position >= 0)
{
i++;
#ifdef TRE_DEBUG
{
int *tags;
DPRINT(("initial: %d", p->position));
tags = p->tags;
if (tags != NULL)
{
if (*tags >= 0)
DPRINT(("/"));
while (*tags >= 0)
{
DPRINT(("%d", *tags));
tags++;
if (*tags >= 0)
DPRINT((","));
}
}
DPRINT((", assert %d", p->assertions));
if (p->params)
{
DPRINT((", "));
tre_print_params(p->params);
}
DPRINT(("\n"));
}
#endif
p++;
}
initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
if (initial == NULL)
ERROR_EXIT(REG_ESPACE);
tnfa->initial = initial;
i = 0;
for (p = tree->firstpos; p->position >= 0; p++)
{
initial[i].state = transitions + offs[p->position];
initial[i].state_id = p->position;
initial[i].tags = NULL;
if (p->tags)
{
int j;
for (j = 0; p->tags[j] >= 0; j++);
initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
if (!initial[i].tags)
ERROR_EXIT(REG_ESPACE);
memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
}
initial[i].params = NULL;
if (p->params)
{
initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
if (!initial[i].params)
ERROR_EXIT(REG_ESPACE);
memcpy(initial[i].params, p->params,
sizeof(*p->params) * TRE_PARAM_LAST);
}
initial[i].assertions = p->assertions;
i++;
}
initial[i].state = NULL;
tnfa->num_transitions = add;
tnfa->final = transitions + offs[tree->lastpos[0].position];
tnfa->num_states = parse_ctx.position;
tnfa->cflags = cflags;
DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
(void *)tnfa->final));
tre_mem_destroy(mem);
tre_stack_destroy(stack);
xfree(counts);
xfree(offs);
#ifdef TRE_USE_SYSTEM_REGEX_H
preg->re_magic = RE_MAGIC;
#endif
preg->TRE_REGEX_T_FIELD = (void *)tnfa;
#ifdef __LIBC__
XL_RETAIN(tnfa->loc);
#endif
return REG_OK;
error_exit:
tre_mem_destroy(mem);
if (stack != NULL)
tre_stack_destroy(stack);
if (counts != NULL)
xfree(counts);
if (offs != NULL)
xfree(offs);
preg->TRE_REGEX_T_FIELD = (void *)tnfa;
#ifdef __LIBC__
if(tnfa) tnfa->loc = NULL;
#endif
tre_free(preg);
return errcode;
}
void
tre_free(regex_t *preg)
{
tre_tnfa_t *tnfa;
unsigned int i;
tre_tnfa_transition_t *trans;
#ifdef TRE_USE_SYSTEM_REGEX_H
preg->re_magic = 0;
#endif
tnfa = (void *)preg->TRE_REGEX_T_FIELD;
if (!tnfa)
return;
preg->TRE_REGEX_T_FIELD = NULL;
for (i = 0; i < tnfa->num_transitions; i++)
if (tnfa->transitions[i].state)
{
if (tnfa->transitions[i].tags)
xfree(tnfa->transitions[i].tags);
if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
xfree(tnfa->transitions[i].u.bracket_match_list);
if (tnfa->transitions[i].params)
xfree(tnfa->transitions[i].params);
}
if (tnfa->transitions)
xfree(tnfa->transitions);
if (tnfa->initial)
{
for (trans = tnfa->initial; trans->state; trans++)
{
if (trans->tags)
xfree(trans->tags);
if (trans->params)
xfree(trans->params);
}
xfree(tnfa->initial);
}
if (tnfa->submatch_data)
{
xfree(tnfa->submatch_data);
}
if (tnfa->tag_directions)
xfree(tnfa->tag_directions);
#ifdef USE_FIRSTPOS_CHARS
if (tnfa->firstpos_chars)
xfree(tnfa->firstpos_chars);
#endif
if (tnfa->minimal_tags)
xfree(tnfa->minimal_tags);
if (tnfa->loc)
#ifdef __LIBC__
XL_RELEASE(tnfa->loc);
#else
freelocale(tnfa->loc);
#endif
if (tnfa->last_matched_branch)
xfree(tnfa->last_matched_branch);
xfree(tnfa);
}
#ifndef __LIBC__
char *
tre_version(void)
{
static char str[256];
char *version;
if (str[0] == 0)
{
(void) tre_config(TRE_CONFIG_VERSION, &version);
(void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
}
return str;
}
int
tre_config(int query, void *result)
{
int *int_result = result;
const char **string_result = result;
switch (query)
{
case TRE_CONFIG_APPROX:
#ifdef TRE_APPROX
*int_result = 1;
#else
*int_result = 0;
#endif
return REG_OK;
case TRE_CONFIG_WCHAR:
#ifdef TRE_WCHAR
*int_result = 1;
#else
*int_result = 0;
#endif
return REG_OK;
case TRE_CONFIG_MULTIBYTE:
#ifdef TRE_MULTIBYTE
*int_result = 1;
#else
*int_result = 0;
#endif
return REG_OK;
case TRE_CONFIG_SYSTEM_ABI:
#ifdef TRE_CONFIG_SYSTEM_ABI
*int_result = 1;
#else
*int_result = 0;
#endif
return REG_OK;
case TRE_CONFIG_VERSION:
*string_result = TRE_VERSION;
return REG_OK;
}
return REG_NOMATCH;
}
#endif