#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <sys/types.h>
#ifdef STDC_HEADERS
#include <stdlib.h>
#else
extern char *calloc(), *malloc(), *realloc();
extern void free();
#endif
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#include <string.h>
#undef index
#define index strchr
#else
#include <strings.h>
#endif
#ifndef DEBUG
#undef assert
#define assert(e)
#endif
#ifndef isgraph
#define isgraph(C) (isprint(C) && !isspace(C))
#endif
#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
#define ISALPHA(C) isalpha(C)
#define ISUPPER(C) isupper(C)
#define ISLOWER(C) islower(C)
#define ISDIGIT(C) isdigit(C)
#define ISXDIGIT(C) isxdigit(C)
#define ISSPACE(C) isspace(C)
#define ISPUNCT(C) ispunct(C)
#define ISALNUM(C) isalnum(C)
#define ISPRINT(C) isprint(C)
#define ISGRAPH(C) isgraph(C)
#define ISCNTRL(C) iscntrl(C)
#else
#define ISALPHA(C) (isascii(C) && isalpha(C))
#define ISUPPER(C) (isascii(C) && isupper(C))
#define ISLOWER(C) (isascii(C) && islower(C))
#define ISDIGIT(C) (isascii(C) && isdigit(C))
#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
#define ISSPACE(C) (isascii(C) && isspace(C))
#define ISPUNCT(C) (isascii(C) && ispunct(C))
#define ISALNUM(C) (isascii(C) && isalnum(C))
#define ISPRINT(C) (isascii(C) && isprint(C))
#define ISGRAPH(C) (isascii(C) && isgraph(C))
#define ISCNTRL(C) (isascii(C) && iscntrl(C))
#endif
#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
#ifndef _
# ifdef HAVE_LIBINTL_H
# include <libintl.h>
# ifndef _
# define _(Str) gettext (Str)
# endif
# else
# define _(Str) (Str)
# endif
#endif
#include "regex.h"
#include "dfa.h"
#ifdef setbit
# undef setbit
#endif
#ifdef clrbit
# undef clrbit
#endif
static void dfamust PARAMS ((struct dfa *dfa));
static ptr_t xcalloc PARAMS ((size_t n, size_t s));
static ptr_t xmalloc PARAMS ((size_t n));
static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
#ifdef DEBUG
static void prtok PARAMS ((token t));
#endif
static int tstbit PARAMS ((int b, charclass c));
static void setbit PARAMS ((int b, charclass c));
static void clrbit PARAMS ((int b, charclass c));
static void copyset PARAMS ((charclass src, charclass dst));
static void zeroset PARAMS ((charclass s));
static void notset PARAMS ((charclass s));
static int equal PARAMS ((charclass s1, charclass s2));
static int charclass_index PARAMS ((charclass s));
static int looking_at PARAMS ((const char *s));
static token lex PARAMS ((void));
static void addtok PARAMS ((token t));
static void atom PARAMS ((void));
static int nsubtoks PARAMS ((int tindex));
static void copytoks PARAMS ((int tindex, int ntokens));
static void closure PARAMS ((void));
static void branch PARAMS ((void));
static void regexp PARAMS ((int toplevel));
static void copy PARAMS ((position_set *src, position_set *dst));
static void insert PARAMS ((position p, position_set *s));
static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
static void delete PARAMS ((position p, position_set *s));
static int state_index PARAMS ((struct dfa *d, position_set *s,
int newline, int letter));
static void build_state PARAMS ((int s, struct dfa *d));
static void build_state_zero PARAMS ((struct dfa *d));
static char *icatalloc PARAMS ((char *old, char *new));
static char *icpyalloc PARAMS ((char *string));
static char *istrstr PARAMS ((char *lookin, char *lookfor));
static void ifree PARAMS ((char *cp));
static void freelist PARAMS ((char **cpp));
static char **enlist PARAMS ((char **cpp, char *new, size_t len));
static char **comsubs PARAMS ((char *left, char *right));
static char **addlists PARAMS ((char **old, char **new));
static char **inboth PARAMS ((char **left, char **right));
static ptr_t
xcalloc (size_t n, size_t s)
{
ptr_t r = calloc(n, s);
if (!r)
dfaerror(_("Memory exhausted"));
return r;
}
static ptr_t
xmalloc (size_t n)
{
ptr_t r = malloc(n);
assert(n != 0);
if (!r)
dfaerror(_("Memory exhausted"));
return r;
}
static ptr_t
xrealloc (ptr_t p, size_t n)
{
ptr_t r = realloc(p, n);
assert(n != 0);
if (!r)
dfaerror(_("Memory exhausted"));
return r;
}
#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
if ((index) >= (nalloc)) \
{ \
while ((index) >= (nalloc)) \
(nalloc) *= 2; \
REALLOC(p, t, nalloc); \
}
#ifdef DEBUG
static void
prtok (token t)
{
char *s;
if (t < 0)
fprintf(stderr, "END");
else if (t < NOTCHAR)
fprintf(stderr, "%c", t);
else
{
switch (t)
{
case EMPTY: s = "EMPTY"; break;
case BACKREF: s = "BACKREF"; break;
case BEGLINE: s = "BEGLINE"; break;
case ENDLINE: s = "ENDLINE"; break;
case BEGWORD: s = "BEGWORD"; break;
case ENDWORD: s = "ENDWORD"; break;
case LIMWORD: s = "LIMWORD"; break;
case NOTLIMWORD: s = "NOTLIMWORD"; break;
case QMARK: s = "QMARK"; break;
case STAR: s = "STAR"; break;
case PLUS: s = "PLUS"; break;
case CAT: s = "CAT"; break;
case OR: s = "OR"; break;
case ORTOP: s = "ORTOP"; break;
case LPAREN: s = "LPAREN"; break;
case RPAREN: s = "RPAREN"; break;
default: s = "CSET"; break;
}
fprintf(stderr, "%s", s);
}
}
#endif
static int
tstbit (int b, charclass c)
{
return c[b / INTBITS] & 1 << b % INTBITS;
}
static void
setbit (int b, charclass c)
{
c[b / INTBITS] |= 1 << b % INTBITS;
}
static void
clrbit (int b, charclass c)
{
c[b / INTBITS] &= ~(1 << b % INTBITS);
}
static void
copyset (charclass src, charclass dst)
{
int i;
for (i = 0; i < CHARCLASS_INTS; ++i)
dst[i] = src[i];
}
static void
zeroset (charclass s)
{
int i;
for (i = 0; i < CHARCLASS_INTS; ++i)
s[i] = 0;
}
static void
notset (charclass s)
{
int i;
for (i = 0; i < CHARCLASS_INTS; ++i)
s[i] = ~s[i];
}
static int
equal (charclass s1, charclass s2)
{
int i;
for (i = 0; i < CHARCLASS_INTS; ++i)
if (s1[i] != s2[i])
return 0;
return 1;
}
static struct dfa *dfa;
static int
charclass_index (charclass s)
{
int i;
for (i = 0; i < dfa->cindex; ++i)
if (equal(s, dfa->charclasses[i]))
return i;
REALLOC_IF_NECESSARY(dfa->charclasses, charclass, dfa->calloc, dfa->cindex);
++dfa->cindex;
copyset(s, dfa->charclasses[i]);
return i;
}
static reg_syntax_t syntax_bits, syntax_bits_set;
static int case_fold;
static unsigned char eolbyte;
void
dfasyntax (reg_syntax_t bits, int fold, int eol)
{
syntax_bits_set = 1;
syntax_bits = bits;
case_fold = fold;
eolbyte = eol;
}
static char *lexstart;
static char *lexptr;
static int lexleft;
static token lasttok;
static int laststart;
static int parens;
static int minrep, maxrep;
#define FETCH(c, eoferr) \
{ \
if (! lexleft) \
{ \
if (eoferr != 0) \
dfaerror (eoferr); \
else \
return lasttok = END; \
} \
(c) = (unsigned char) *lexptr++; \
--lexleft; \
}
#ifdef __STDC__
#define FUNC(F, P) static int F(int c) { return P(c); }
#else
#define FUNC(F, P) static int F(c) int c; { return P(c); }
#endif
FUNC(is_alpha, ISALPHA)
FUNC(is_upper, ISUPPER)
FUNC(is_lower, ISLOWER)
FUNC(is_digit, ISDIGIT)
FUNC(is_xdigit, ISXDIGIT)
FUNC(is_space, ISSPACE)
FUNC(is_punct, ISPUNCT)
FUNC(is_alnum, ISALNUM)
FUNC(is_print, ISPRINT)
FUNC(is_graph, ISGRAPH)
FUNC(is_cntrl, ISCNTRL)
static int
is_blank (int c)
{
return (c == ' ' || c == '\t');
}
static struct {
const char *name;
int (*pred) PARAMS ((int));
} prednames[] = {
{ ":alpha:]", is_alpha },
{ ":upper:]", is_upper },
{ ":lower:]", is_lower },
{ ":digit:]", is_digit },
{ ":xdigit:]", is_xdigit },
{ ":space:]", is_space },
{ ":punct:]", is_punct },
{ ":alnum:]", is_alnum },
{ ":print:]", is_print },
{ ":graph:]", is_graph },
{ ":cntrl:]", is_cntrl },
{ ":blank:]", is_blank },
{ 0 }
};
#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
static int
looking_at (char const *s)
{
size_t len;
len = strlen(s);
if (lexleft < len)
return 0;
return strncmp(s, lexptr, len) == 0;
}
static token
lex (void)
{
token c, c1, c2;
int backslash = 0, invert;
charclass ccl;
int i;
char lo[2];
char hi[2];
for (i = 0; i < 2; ++i)
{
FETCH(c, 0);
switch (c)
{
case '\\':
if (backslash)
goto normal_char;
if (lexleft == 0)
dfaerror(_("Unfinished \\ escape"));
backslash = 1;
break;
case '^':
if (backslash)
goto normal_char;
if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
|| lasttok == END
|| lasttok == LPAREN
|| lasttok == OR)
return lasttok = BEGLINE;
goto normal_char;
case '$':
if (backslash)
goto normal_char;
if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
|| lexleft == 0
|| (syntax_bits & RE_NO_BK_PARENS
? lexleft > 0 && *lexptr == ')'
: lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
|| (syntax_bits & RE_NO_BK_VBAR
? lexleft > 0 && *lexptr == '|'
: lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
|| ((syntax_bits & RE_NEWLINE_ALT)
&& lexleft > 0 && *lexptr == '\n'))
return lasttok = ENDLINE;
goto normal_char;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (backslash && !(syntax_bits & RE_NO_BK_REFS))
{
laststart = 0;
return lasttok = BACKREF;
}
goto normal_char;
case '`':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = BEGLINE;
goto normal_char;
case '\'':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = ENDLINE;
goto normal_char;
case '<':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = BEGWORD;
goto normal_char;
case '>':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = ENDWORD;
goto normal_char;
case 'b':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = LIMWORD;
goto normal_char;
case 'B':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = NOTLIMWORD;
goto normal_char;
case '?':
if (syntax_bits & RE_LIMITED_OPS)
goto normal_char;
if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
goto normal_char;
if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
goto normal_char;
return lasttok = QMARK;
case '*':
if (backslash)
goto normal_char;
if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
goto normal_char;
return lasttok = STAR;
case '+':
if (syntax_bits & RE_LIMITED_OPS)
goto normal_char;
if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
goto normal_char;
if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
goto normal_char;
return lasttok = PLUS;
case '{':
if (!(syntax_bits & RE_INTERVALS))
goto normal_char;
if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
goto normal_char;
if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
goto normal_char;
if (syntax_bits & RE_NO_BK_BRACES)
{
int lo = -1, hi = -1;
char const *p = lexptr;
char const *lim = p + lexleft;
for (; p != lim && ISASCIIDIGIT (*p); p++)
lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
if (p != lim && *p == ',')
while (++p != lim && ISASCIIDIGIT (*p))
hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
else
hi = lo;
if (p == lim || *p != '}'
|| lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
goto normal_char;
}
minrep = 0;
FETCH(c, _("unfinished repeat count"));
if (ISASCIIDIGIT (c))
{
minrep = c - '0';
for (;;)
{
FETCH(c, _("unfinished repeat count"));
if (! ISASCIIDIGIT (c))
break;
minrep = 10 * minrep + c - '0';
}
}
else
dfaerror(_("malformed repeat count"));
if (c == ',')
{
FETCH (c, _("unfinished repeat count"));
if (! ISASCIIDIGIT (c))
maxrep = -1;
else
{
maxrep = c - '0';
for (;;)
{
FETCH (c, _("unfinished repeat count"));
if (! ISASCIIDIGIT (c))
break;
maxrep = 10 * maxrep + c - '0';
}
if (0 <= maxrep && maxrep < minrep)
dfaerror (_("malformed repeat count"));
}
}
else
maxrep = minrep;
if (!(syntax_bits & RE_NO_BK_BRACES))
{
if (c != '\\')
dfaerror(_("malformed repeat count"));
FETCH(c, _("unfinished repeat count"));
}
if (c != '}')
dfaerror(_("malformed repeat count"));
laststart = 0;
return lasttok = REPMN;
case '|':
if (syntax_bits & RE_LIMITED_OPS)
goto normal_char;
if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
goto normal_char;
laststart = 1;
return lasttok = OR;
case '\n':
if (syntax_bits & RE_LIMITED_OPS
|| backslash
|| !(syntax_bits & RE_NEWLINE_ALT))
goto normal_char;
laststart = 1;
return lasttok = OR;
case '(':
if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
goto normal_char;
++parens;
laststart = 1;
return lasttok = LPAREN;
case ')':
if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
goto normal_char;
if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
--parens;
laststart = 0;
return lasttok = RPAREN;
case '.':
if (backslash)
goto normal_char;
zeroset(ccl);
notset(ccl);
if (!(syntax_bits & RE_DOT_NEWLINE))
clrbit(eolbyte, ccl);
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit('\0', ccl);
laststart = 0;
return lasttok = CSET + charclass_index(ccl);
case 'w':
case 'W':
if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
zeroset(ccl);
for (c2 = 0; c2 < NOTCHAR; ++c2)
if (IS_WORD_CONSTITUENT(c2))
setbit(c2, ccl);
if (c == 'W')
notset(ccl);
laststart = 0;
return lasttok = CSET + charclass_index(ccl);
case '[':
if (backslash)
goto normal_char;
zeroset(ccl);
FETCH(c, _("Unbalanced ["));
if (c == '^')
{
FETCH(c, _("Unbalanced ["));
invert = 1;
}
else
invert = 0;
do
{
if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
for (c1 = 0; prednames[c1].name; ++c1)
if (looking_at(prednames[c1].name))
{
int (*pred)() = prednames[c1].pred;
if (case_fold
&& (pred == is_upper || pred == is_lower))
pred = is_alpha;
for (c2 = 0; c2 < NOTCHAR; ++c2)
if ((*pred)(c2))
setbit(c2, ccl);
lexptr += strlen(prednames[c1].name);
lexleft -= strlen(prednames[c1].name);
FETCH(c1, _("Unbalanced ["));
goto skip;
}
if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH(c, _("Unbalanced ["));
FETCH(c1, _("Unbalanced ["));
if (c1 == '-')
{
FETCH(c2, _("Unbalanced ["));
if (c2 == ']')
{
--lexptr;
++lexleft;
c2 = c;
}
else
{
if (c2 == '\\'
&& (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH(c2, _("Unbalanced ["));
FETCH(c1, _("Unbalanced ["));
}
}
else
c2 = c;
lo[0] = c; lo[1] = '\0';
hi[0] = c2; hi[1] = '\0';
for (c = 0; c < NOTCHAR; c++)
{
char ch[2];
ch[0] = c; ch[1] = '\0';
if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
{
setbit (c, ccl);
if (case_fold)
{
if (ISUPPER (c))
setbit (tolower (c), ccl);
else if (ISLOWER (c))
setbit (toupper (c), ccl);
}
}
}
skip:
;
}
while ((c = c1) != ']');
if (invert)
{
notset(ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit(eolbyte, ccl);
}
laststart = 0;
return lasttok = CSET + charclass_index(ccl);
default:
normal_char:
laststart = 0;
if (case_fold && ISALPHA(c))
{
zeroset(ccl);
setbit(c, ccl);
if (isupper(c))
setbit(tolower(c), ccl);
else
setbit(toupper(c), ccl);
return lasttok = CSET + charclass_index(ccl);
}
return c;
}
}
abort();
return END;
}
static token tok;
static int depth;
static void
addtok (token t)
{
REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
dfa->tokens[dfa->tindex++] = t;
switch (t)
{
case QMARK:
case STAR:
case PLUS:
break;
case CAT:
case OR:
case ORTOP:
--depth;
break;
default:
++dfa->nleaves;
case EMPTY:
++depth;
break;
}
if (depth > dfa->depth)
dfa->depth = depth;
}
static void
atom (void)
{
if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
addtok(tok);
tok = lex();
}
else if (tok == LPAREN)
{
tok = lex();
regexp(0);
if (tok != RPAREN)
dfaerror(_("Unbalanced ("));
tok = lex();
}
else
addtok(EMPTY);
}
static int
nsubtoks (int tindex)
{
int ntoks1;
switch (dfa->tokens[tindex - 1])
{
default:
return 1;
case QMARK:
case STAR:
case PLUS:
return 1 + nsubtoks(tindex - 1);
case CAT:
case OR:
case ORTOP:
ntoks1 = nsubtoks(tindex - 1);
return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
}
}
static void
copytoks (int tindex, int ntokens)
{
int i;
for (i = 0; i < ntokens; ++i)
addtok(dfa->tokens[tindex + i]);
}
static void
closure (void)
{
int tindex, ntokens, i;
atom();
while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
if (tok == REPMN)
{
ntokens = nsubtoks(dfa->tindex);
tindex = dfa->tindex - ntokens;
if (maxrep < 0)
addtok(PLUS);
if (minrep == 0)
addtok(QMARK);
for (i = 1; i < minrep; ++i)
{
copytoks(tindex, ntokens);
addtok(CAT);
}
for (; i < maxrep; ++i)
{
copytoks(tindex, ntokens);
addtok(QMARK);
addtok(CAT);
}
tok = lex();
}
else
{
addtok(tok);
tok = lex();
}
}
static void
branch (void)
{
closure();
while (tok != RPAREN && tok != OR && tok >= 0)
{
closure();
addtok(CAT);
}
}
static void
regexp (int toplevel)
{
branch();
while (tok == OR)
{
tok = lex();
branch();
if (toplevel)
addtok(ORTOP);
else
addtok(OR);
}
}
void
dfaparse (char *s, size_t len, struct dfa *d)
{
dfa = d;
lexstart = lexptr = s;
lexleft = len;
lasttok = END;
laststart = 1;
parens = 0;
if (! syntax_bits_set)
dfaerror(_("No syntax specified"));
tok = lex();
depth = d->depth;
regexp(1);
if (tok != END)
dfaerror(_("Unbalanced )"));
addtok(END - d->nregexps);
addtok(CAT);
if (d->nregexps)
addtok(ORTOP);
++d->nregexps;
}
static void
copy (position_set *src, position_set *dst)
{
int i;
for (i = 0; i < src->nelem; ++i)
dst->elems[i] = src->elems[i];
dst->nelem = src->nelem;
}
static void
insert (position p, position_set *s)
{
int i;
position t1, t2;
for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
continue;
if (i < s->nelem && p.index == s->elems[i].index)
s->elems[i].constraint |= p.constraint;
else
{
t1 = p;
++s->nelem;
while (i < s->nelem)
{
t2 = s->elems[i];
s->elems[i++] = t1;
t1 = t2;
}
}
}
static void
merge (position_set *s1, position_set *s2, position_set *m)
{
int i = 0, j = 0;
m->nelem = 0;
while (i < s1->nelem && j < s2->nelem)
if (s1->elems[i].index > s2->elems[j].index)
m->elems[m->nelem++] = s1->elems[i++];
else if (s1->elems[i].index < s2->elems[j].index)
m->elems[m->nelem++] = s2->elems[j++];
else
{
m->elems[m->nelem] = s1->elems[i++];
m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
}
while (i < s1->nelem)
m->elems[m->nelem++] = s1->elems[i++];
while (j < s2->nelem)
m->elems[m->nelem++] = s2->elems[j++];
}
static void
delete (position p, position_set *s)
{
int i;
for (i = 0; i < s->nelem; ++i)
if (p.index == s->elems[i].index)
break;
if (i < s->nelem)
for (--s->nelem; i < s->nelem; ++i)
s->elems[i] = s->elems[i + 1];
}
static int
state_index (struct dfa *d, position_set *s, int newline, int letter)
{
int hash = 0;
int constraint;
int i, j;
newline = newline ? 1 : 0;
letter = letter ? 1 : 0;
for (i = 0; i < s->nelem; ++i)
hash ^= s->elems[i].index + s->elems[i].constraint;
for (i = 0; i < d->sindex; ++i)
{
if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
|| newline != d->states[i].newline || letter != d->states[i].letter)
continue;
for (j = 0; j < s->nelem; ++j)
if (s->elems[j].constraint
!= d->states[i].elems.elems[j].constraint
|| s->elems[j].index != d->states[i].elems.elems[j].index)
break;
if (j == s->nelem)
return i;
}
REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
d->states[i].hash = hash;
MALLOC(d->states[i].elems.elems, position, s->nelem);
copy(s, &d->states[i].elems);
d->states[i].newline = newline;
d->states[i].letter = letter;
d->states[i].backref = 0;
d->states[i].constraint = 0;
d->states[i].first_end = 0;
for (j = 0; j < s->nelem; ++j)
if (d->tokens[s->elems[j].index] < 0)
{
constraint = s->elems[j].constraint;
if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
|| SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
|| SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
|| SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
d->states[i].constraint |= constraint;
if (! d->states[i].first_end)
d->states[i].first_end = d->tokens[s->elems[j].index];
}
else if (d->tokens[s->elems[j].index] == BACKREF)
{
d->states[i].constraint = NO_CONSTRAINT;
d->states[i].backref = 1;
}
++d->sindex;
return i;
}
static void
epsclosure (position_set *s, struct dfa *d)
{
int i, j;
int *visited;
position p, old;
MALLOC(visited, int, d->tindex);
for (i = 0; i < d->tindex; ++i)
visited[i] = 0;
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
&& d->tokens[s->elems[i].index] != BACKREF
&& d->tokens[s->elems[i].index] < CSET)
{
old = s->elems[i];
p.constraint = old.constraint;
delete(s->elems[i], s);
if (visited[old.index])
{
--i;
continue;
}
visited[old.index] = 1;
switch (d->tokens[old.index])
{
case BEGLINE:
p.constraint &= BEGLINE_CONSTRAINT;
break;
case ENDLINE:
p.constraint &= ENDLINE_CONSTRAINT;
break;
case BEGWORD:
p.constraint &= BEGWORD_CONSTRAINT;
break;
case ENDWORD:
p.constraint &= ENDWORD_CONSTRAINT;
break;
case LIMWORD:
p.constraint &= LIMWORD_CONSTRAINT;
break;
case NOTLIMWORD:
p.constraint &= NOTLIMWORD_CONSTRAINT;
break;
default:
break;
}
for (j = 0; j < d->follows[old.index].nelem; ++j)
{
p.index = d->follows[old.index].elems[j].index;
insert(p, s);
}
i = -1;
}
free(visited);
}
void
dfaanalyze (struct dfa *d, int searchflag)
{
int *nullable;
int *nfirstpos;
position *firstpos;
int *nlastpos;
position *lastpos;
int *nalloc;
position_set tmp;
position_set merged;
int wants_newline;
int *o_nullable;
int *o_nfirst, *o_nlast;
position *o_firstpos, *o_lastpos;
int i, j;
position *pos;
#ifdef DEBUG
fprintf(stderr, "dfaanalyze:\n");
for (i = 0; i < d->tindex; ++i)
{
fprintf(stderr, " %d:", i);
prtok(d->tokens[i]);
}
putc('\n', stderr);
#endif
d->searchflag = searchflag;
MALLOC(nullable, int, d->depth);
o_nullable = nullable;
MALLOC(nfirstpos, int, d->depth);
o_nfirst = nfirstpos;
MALLOC(firstpos, position, d->nleaves);
o_firstpos = firstpos, firstpos += d->nleaves;
MALLOC(nlastpos, int, d->depth);
o_nlast = nlastpos;
MALLOC(lastpos, position, d->nleaves);
o_lastpos = lastpos, lastpos += d->nleaves;
MALLOC(nalloc, int, d->tindex);
for (i = 0; i < d->tindex; ++i)
nalloc[i] = 0;
MALLOC(merged.elems, position, d->nleaves);
CALLOC(d->follows, position_set, d->tindex);
for (i = 0; i < d->tindex; ++i)
#ifdef DEBUG
{
#endif
switch (d->tokens[i])
{
case EMPTY:
*nullable++ = 1;
*nfirstpos++ = *nlastpos++ = 0;
break;
case STAR:
case PLUS:
tmp.nelem = nfirstpos[-1];
tmp.elems = firstpos;
pos = lastpos;
for (j = 0; j < nlastpos[-1]; ++j)
{
merge(&tmp, &d->follows[pos[j].index], &merged);
REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
nalloc[pos[j].index], merged.nelem - 1);
copy(&merged, &d->follows[pos[j].index]);
}
case QMARK:
if (d->tokens[i] != PLUS)
nullable[-1] = 1;
break;
case CAT:
tmp.nelem = nfirstpos[-1];
tmp.elems = firstpos;
pos = lastpos + nlastpos[-1];
for (j = 0; j < nlastpos[-2]; ++j)
{
merge(&tmp, &d->follows[pos[j].index], &merged);
REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
nalloc[pos[j].index], merged.nelem - 1);
copy(&merged, &d->follows[pos[j].index]);
}
if (nullable[-2])
nfirstpos[-2] += nfirstpos[-1];
else
firstpos += nfirstpos[-1];
--nfirstpos;
if (nullable[-1])
nlastpos[-2] += nlastpos[-1];
else
{
pos = lastpos + nlastpos[-2];
for (j = nlastpos[-1] - 1; j >= 0; --j)
pos[j] = lastpos[j];
lastpos += nlastpos[-2];
nlastpos[-2] = nlastpos[-1];
}
--nlastpos;
nullable[-2] = nullable[-1] && nullable[-2];
--nullable;
break;
case OR:
case ORTOP:
nfirstpos[-2] += nfirstpos[-1];
--nfirstpos;
nlastpos[-2] += nlastpos[-1];
--nlastpos;
nullable[-2] = nullable[-1] || nullable[-2];
--nullable;
break;
default:
*nullable++ = d->tokens[i] == BACKREF;
*nfirstpos++ = *nlastpos++ = 1;
--firstpos, --lastpos;
firstpos->index = lastpos->index = i;
firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
nalloc[i] = 1;
MALLOC(d->follows[i].elems, position, nalloc[i]);
break;
}
#ifdef DEBUG
fprintf(stderr, "node %d:", i);
prtok(d->tokens[i]);
putc('\n', stderr);
fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
fprintf(stderr, " firstpos:");
for (j = nfirstpos[-1] - 1; j >= 0; --j)
{
fprintf(stderr, " %d:", firstpos[j].index);
prtok(d->tokens[firstpos[j].index]);
}
fprintf(stderr, "\n lastpos:");
for (j = nlastpos[-1] - 1; j >= 0; --j)
{
fprintf(stderr, " %d:", lastpos[j].index);
prtok(d->tokens[lastpos[j].index]);
}
putc('\n', stderr);
}
#endif
for (i = 0; i < d->tindex; ++i)
if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
|| d->tokens[i] >= CSET)
{
#ifdef DEBUG
fprintf(stderr, "follows(%d:", i);
prtok(d->tokens[i]);
fprintf(stderr, "):");
for (j = d->follows[i].nelem - 1; j >= 0; --j)
{
fprintf(stderr, " %d:", d->follows[i].elems[j].index);
prtok(d->tokens[d->follows[i].elems[j].index]);
}
putc('\n', stderr);
#endif
copy(&d->follows[i], &merged);
epsclosure(&merged, d);
if (d->follows[i].nelem < merged.nelem)
REALLOC(d->follows[i].elems, position, merged.nelem);
copy(&merged, &d->follows[i]);
}
merged.nelem = 0;
for (i = 0; i < nfirstpos[-1]; ++i)
insert(firstpos[i], &merged);
epsclosure(&merged, d);
wants_newline = 0;
for (i = 0; i < merged.nelem; ++i)
if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
wants_newline = 1;
d->salloc = 1;
d->sindex = 0;
MALLOC(d->states, dfa_state, d->salloc);
state_index(d, &merged, wants_newline, 0);
free(o_nullable);
free(o_nfirst);
free(o_firstpos);
free(o_nlast);
free(o_lastpos);
free(nalloc);
free(merged.elems);
}
void
dfastate (int s, struct dfa *d, int trans[])
{
position_set grps[NOTCHAR];
charclass labels[NOTCHAR];
int ngrps = 0;
position pos;
charclass matches;
int matchesf;
charclass intersect;
int intersectf;
charclass leftovers;
int leftoversf;
static charclass letters;
static charclass newline;
position_set follows;
position_set tmp;
int state;
int wants_newline;
int state_newline;
int wants_letter;
int state_letter;
static int initialized;
int i, j, k;
if (! initialized)
{
initialized = 1;
for (i = 0; i < NOTCHAR; ++i)
if (IS_WORD_CONSTITUENT(i))
setbit(i, letters);
setbit(eolbyte, newline);
}
zeroset(matches);
for (i = 0; i < d->states[s].elems.nelem; ++i)
{
pos = d->states[s].elems.elems[i];
if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
setbit(d->tokens[pos.index], matches);
else if (d->tokens[pos.index] >= CSET)
copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
else
continue;
if (pos.constraint != 0xFF)
{
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 1))
clrbit(eolbyte, matches);
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 0))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= newline[j];
if (! MATCHES_LETTER_CONTEXT(pos.constraint,
d->states[s].letter, 1))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= ~letters[j];
if (! MATCHES_LETTER_CONTEXT(pos.constraint,
d->states[s].letter, 0))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= letters[j];
for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
continue;
if (j == CHARCLASS_INTS)
continue;
}
for (j = 0; j < ngrps; ++j)
{
if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
&& !tstbit(d->tokens[pos.index], labels[j]))
continue;
intersectf = 0;
for (k = 0; k < CHARCLASS_INTS; ++k)
(intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
if (! intersectf)
continue;
leftoversf = matchesf = 0;
for (k = 0; k < CHARCLASS_INTS; ++k)
{
int match = matches[k], label = labels[j][k];
(leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
(matches[k] = match & ~label) ? (matchesf = 1) : 0;
}
if (leftoversf)
{
copyset(leftovers, labels[ngrps]);
copyset(intersect, labels[j]);
MALLOC(grps[ngrps].elems, position, d->nleaves);
copy(&grps[j], &grps[ngrps]);
++ngrps;
}
grps[j].elems[grps[j].nelem++] = pos;
if (! matchesf)
break;
}
if (j == ngrps)
{
copyset(matches, labels[ngrps]);
zeroset(matches);
MALLOC(grps[ngrps].elems, position, d->nleaves);
grps[ngrps].nelem = 1;
grps[ngrps].elems[0] = pos;
++ngrps;
}
}
MALLOC(follows.elems, position, d->nleaves);
MALLOC(tmp.elems, position, d->nleaves);
if (d->searchflag)
{
wants_newline = 0;
wants_letter = 0;
for (i = 0; i < d->states[0].elems.nelem; ++i)
{
if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
wants_newline = 1;
if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
wants_letter = 1;
}
copy(&d->states[0].elems, &follows);
state = state_index(d, &follows, 0, 0);
if (wants_newline)
state_newline = state_index(d, &follows, 1, 0);
else
state_newline = state;
if (wants_letter)
state_letter = state_index(d, &follows, 0, 1);
else
state_letter = state;
for (i = 0; i < NOTCHAR; ++i)
trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
trans[eolbyte] = state_newline;
}
else
for (i = 0; i < NOTCHAR; ++i)
trans[i] = -1;
for (i = 0; i < ngrps; ++i)
{
follows.nelem = 0;
for (j = 0; j < grps[i].nelem; ++j)
for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
if (d->searchflag)
for (j = 0; j < d->states[0].elems.nelem; ++j)
insert(d->states[0].elems.elems[j], &follows);
wants_newline = 0;
if (tstbit(eolbyte, labels[i]))
for (j = 0; j < follows.nelem; ++j)
if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
wants_newline = 1;
wants_letter = 0;
for (j = 0; j < CHARCLASS_INTS; ++j)
if (labels[i][j] & letters[j])
break;
if (j < CHARCLASS_INTS)
for (j = 0; j < follows.nelem; ++j)
if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
wants_letter = 1;
state = state_index(d, &follows, 0, 0);
if (wants_newline)
state_newline = state_index(d, &follows, 1, 0);
else
state_newline = state;
if (wants_letter)
state_letter = state_index(d, &follows, 0, 1);
else
state_letter = state;
for (j = 0; j < CHARCLASS_INTS; ++j)
for (k = 0; k < INTBITS; ++k)
if (labels[i][j] & 1 << k)
{
int c = j * INTBITS + k;
if (c == eolbyte)
trans[c] = state_newline;
else if (IS_WORD_CONSTITUENT(c))
trans[c] = state_letter;
else if (c < NOTCHAR)
trans[c] = state;
}
}
for (i = 0; i < ngrps; ++i)
free(grps[i].elems);
free(follows.elems);
free(tmp.elems);
}
static void
build_state (int s, struct dfa *d)
{
int *trans;
int i;
if (d->trcount >= 1024)
{
for (i = 0; i < d->tralloc; ++i)
if (d->trans[i])
{
free((ptr_t) d->trans[i]);
d->trans[i] = NULL;
}
else if (d->fails[i])
{
free((ptr_t) d->fails[i]);
d->fails[i] = NULL;
}
d->trcount = 0;
}
++d->trcount;
d->success[s] = 0;
if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
s, *d))
d->success[s] |= 4;
if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
s, *d))
d->success[s] |= 2;
if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
s, *d))
d->success[s] |= 1;
MALLOC(trans, int, NOTCHAR);
dfastate(s, d, trans);
for (i = 0; i < NOTCHAR; ++i)
if (trans[i] >= d->tralloc)
{
int oldalloc = d->tralloc;
while (trans[i] >= d->tralloc)
d->tralloc *= 2;
REALLOC(d->realtrans, int *, d->tralloc + 1);
d->trans = d->realtrans + 1;
REALLOC(d->fails, int *, d->tralloc);
REALLOC(d->success, int, d->tralloc);
REALLOC(d->newlines, int, d->tralloc);
while (oldalloc < d->tralloc)
{
d->trans[oldalloc] = NULL;
d->fails[oldalloc++] = NULL;
}
}
d->newlines[s] = trans[eolbyte];
trans[eolbyte] = -1;
if (ACCEPTING(s, *d))
d->fails[s] = trans;
else
d->trans[s] = trans;
}
static void
build_state_zero (struct dfa *d)
{
d->tralloc = 1;
d->trcount = 0;
CALLOC(d->realtrans, int *, d->tralloc + 1);
d->trans = d->realtrans + 1;
CALLOC(d->fails, int *, d->tralloc);
MALLOC(d->success, int, d->tralloc);
MALLOC(d->newlines, int, d->tralloc);
build_state(0, d);
}
char *
dfaexec (struct dfa *d, char *begin, char *end,
int newline, int *count, int *backref)
{
register int s, s1, tmp;
register unsigned char *p;
register int **trans, *t;
register unsigned char eol = eolbyte;
static int sbit[NOTCHAR];
static int sbit_init;
if (! sbit_init)
{
int i;
sbit_init = 1;
for (i = 0; i < NOTCHAR; ++i)
sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
sbit[eol] = 4;
}
if (! d->tralloc)
build_state_zero(d);
s = s1 = 0;
p = (unsigned char *) begin;
trans = d->trans;
*end = eol;
for (;;)
{
while ((t = trans[s]) != 0) {
s1 = t[*p++];
if ((t = trans[s1]) == 0) {
tmp = s ; s = s1 ; s1 = tmp ;
break;
}
s = t[*p++];
}
if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
{
if (d->success[s] & sbit[*p])
{
if (backref)
*backref = (d->states[s].backref != 0);
return (char *) p;
}
s1 = s;
s = d->fails[s][*p++];
continue;
}
if (count && (char *) p <= end && p[-1] == eol)
++*count;
if ((char *) p > end)
return NULL;
if (s >= 0)
{
build_state(s, d);
trans = d->trans;
continue;
}
if (p[-1] == eol && newline)
{
s = d->newlines[s1];
continue;
}
s = 0;
}
}
void
dfainit (struct dfa *d)
{
d->calloc = 1;
MALLOC(d->charclasses, charclass, d->calloc);
d->cindex = 0;
d->talloc = 1;
MALLOC(d->tokens, token, d->talloc);
d->tindex = d->depth = d->nleaves = d->nregexps = 0;
d->searchflag = 0;
d->tralloc = 0;
d->musts = 0;
}
void
dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
{
if (case_fold)
{
char *lcopy;
int i;
lcopy = malloc(len);
if (!lcopy)
dfaerror(_("out of memory"));
case_fold = 0;
for (i = 0; i < len; ++i)
if (ISUPPER ((unsigned char) s[i]))
lcopy[i] = tolower ((unsigned char) s[i]);
else
lcopy[i] = s[i];
dfainit(d);
dfaparse(lcopy, len, d);
free(lcopy);
dfamust(d);
d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
case_fold = 1;
dfaparse(s, len, d);
dfaanalyze(d, searchflag);
}
else
{
dfainit(d);
dfaparse(s, len, d);
dfamust(d);
dfaanalyze(d, searchflag);
}
}
void
dfafree (struct dfa *d)
{
int i;
struct dfamust *dm, *ndm;
free((ptr_t) d->charclasses);
free((ptr_t) d->tokens);
for (i = 0; i < d->sindex; ++i)
free((ptr_t) d->states[i].elems.elems);
free((ptr_t) d->states);
for (i = 0; i < d->tindex; ++i)
if (d->follows[i].elems)
free((ptr_t) d->follows[i].elems);
free((ptr_t) d->follows);
for (i = 0; i < d->tralloc; ++i)
if (d->trans[i])
free((ptr_t) d->trans[i]);
else if (d->fails[i])
free((ptr_t) d->fails[i]);
if (d->realtrans) free((ptr_t) d->realtrans);
if (d->fails) free((ptr_t) d->fails);
if (d->newlines) free((ptr_t) d->newlines);
if (d->success) free((ptr_t) d->success);
for (dm = d->musts; dm; dm = ndm)
{
ndm = dm->next;
free(dm->must);
free((ptr_t) dm);
}
}
static char *
icatalloc (char *old, char *new)
{
char *result;
size_t oldsize, newsize;
newsize = (new == NULL) ? 0 : strlen(new);
if (old == NULL)
oldsize = 0;
else if (newsize == 0)
return old;
else oldsize = strlen(old);
if (old == NULL)
result = (char *) malloc(newsize + 1);
else
result = (char *) realloc((void *) old, oldsize + newsize + 1);
if (result != NULL && new != NULL)
(void) strcpy(result + oldsize, new);
return result;
}
static char *
icpyalloc (char *string)
{
return icatalloc((char *) NULL, string);
}
static char *
istrstr (char *lookin, char *lookfor)
{
char *cp;
size_t len;
len = strlen(lookfor);
for (cp = lookin; *cp != '\0'; ++cp)
if (strncmp(cp, lookfor, len) == 0)
return cp;
return NULL;
}
static void
ifree (char *cp)
{
if (cp != NULL)
free(cp);
}
static void
freelist (char **cpp)
{
int i;
if (cpp == NULL)
return;
for (i = 0; cpp[i] != NULL; ++i)
{
free(cpp[i]);
cpp[i] = NULL;
}
}
static char **
enlist (char **cpp, char *new, size_t len)
{
int i, j;
if (cpp == NULL)
return NULL;
if ((new = icpyalloc(new)) == NULL)
{
freelist(cpp);
return NULL;
}
new[len] = '\0';
for (i = 0; cpp[i] != NULL; ++i)
if (istrstr(cpp[i], new) != NULL)
{
free(new);
return cpp;
}
j = 0;
while (cpp[j] != NULL)
if (istrstr(new, cpp[j]) == NULL)
++j;
else
{
free(cpp[j]);
if (--i == j)
break;
cpp[j] = cpp[i];
cpp[i] = NULL;
}
cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
if (cpp == NULL)
return NULL;
cpp[i] = new;
cpp[i + 1] = NULL;
return cpp;
}
static char **
comsubs (char *left, char *right)
{
char **cpp;
char *lcp;
char *rcp;
size_t i, len;
if (left == NULL || right == NULL)
return NULL;
cpp = (char **) malloc(sizeof *cpp);
if (cpp == NULL)
return NULL;
cpp[0] = NULL;
for (lcp = left; *lcp != '\0'; ++lcp)
{
len = 0;
rcp = index(right, *lcp);
while (rcp != NULL)
{
for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
continue;
if (i > len)
len = i;
rcp = index(rcp + 1, *lcp);
}
if (len == 0)
continue;
if ((cpp = enlist(cpp, lcp, len)) == NULL)
break;
}
return cpp;
}
static char **
addlists (char **old, char **new)
{
int i;
if (old == NULL || new == NULL)
return NULL;
for (i = 0; new[i] != NULL; ++i)
{
old = enlist(old, new[i], strlen(new[i]));
if (old == NULL)
break;
}
return old;
}
static char **
inboth (char **left, char **right)
{
char **both;
char **temp;
int lnum, rnum;
if (left == NULL || right == NULL)
return NULL;
both = (char **) malloc(sizeof *both);
if (both == NULL)
return NULL;
both[0] = NULL;
for (lnum = 0; left[lnum] != NULL; ++lnum)
{
for (rnum = 0; right[rnum] != NULL; ++rnum)
{
temp = comsubs(left[lnum], right[rnum]);
if (temp == NULL)
{
freelist(both);
return NULL;
}
both = addlists(both, temp);
freelist(temp);
free(temp);
if (both == NULL)
return NULL;
}
}
return both;
}
typedef struct
{
char **in;
char *left;
char *right;
char *is;
} must;
static void
resetmust (must *mp)
{
mp->left[0] = mp->right[0] = mp->is[0] = '\0';
freelist(mp->in);
}
static void
dfamust (struct dfa *dfa)
{
must *musts;
must *mp;
char *result;
int ri;
int i;
int exact;
token t;
static must must0;
struct dfamust *dm;
static char empty_string[] = "";
result = empty_string;
exact = 0;
musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
if (musts == NULL)
return;
mp = musts;
for (i = 0; i <= dfa->tindex; ++i)
mp[i] = must0;
for (i = 0; i <= dfa->tindex; ++i)
{
mp[i].in = (char **) malloc(sizeof *mp[i].in);
mp[i].left = malloc(2);
mp[i].right = malloc(2);
mp[i].is = malloc(2);
if (mp[i].in == NULL || mp[i].left == NULL ||
mp[i].right == NULL || mp[i].is == NULL)
goto done;
mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
mp[i].in[0] = NULL;
}
#ifdef DEBUG
fprintf(stderr, "dfamust:\n");
for (i = 0; i < dfa->tindex; ++i)
{
fprintf(stderr, " %d:", i);
prtok(dfa->tokens[i]);
}
putc('\n', stderr);
#endif
for (ri = 0; ri < dfa->tindex; ++ri)
{
switch (t = dfa->tokens[ri])
{
case LPAREN:
case RPAREN:
goto done;
case EMPTY:
case BEGLINE:
case ENDLINE:
case BEGWORD:
case ENDWORD:
case LIMWORD:
case NOTLIMWORD:
case BACKREF:
resetmust(mp);
break;
case STAR:
case QMARK:
if (mp <= musts)
goto done;
--mp;
resetmust(mp);
break;
case OR:
case ORTOP:
if (mp < &musts[2])
goto done;
{
char **new;
must *lmp;
must *rmp;
int j, ln, rn, n;
rmp = --mp;
lmp = --mp;
if (strcmp(lmp->is, rmp->is) != 0)
lmp->is[0] = '\0';
i = 0;
while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
++i;
lmp->left[i] = '\0';
ln = strlen(lmp->right);
rn = strlen(rmp->right);
n = ln;
if (n > rn)
n = rn;
for (i = 0; i < n; ++i)
if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
break;
for (j = 0; j < i; ++j)
lmp->right[j] = lmp->right[(ln - i) + j];
lmp->right[j] = '\0';
new = inboth(lmp->in, rmp->in);
if (new == NULL)
goto done;
freelist(lmp->in);
free((char *) lmp->in);
lmp->in = new;
}
break;
case PLUS:
if (mp <= musts)
goto done;
--mp;
mp->is[0] = '\0';
break;
case END:
if (mp != &musts[1])
goto done;
for (i = 0; musts[0].in[i] != NULL; ++i)
if (strlen(musts[0].in[i]) > strlen(result))
result = musts[0].in[i];
if (strcmp(result, musts[0].is) == 0)
exact = 1;
goto done;
case CAT:
if (mp < &musts[2])
goto done;
{
must *lmp;
must *rmp;
rmp = --mp;
lmp = --mp;
lmp->in = addlists(lmp->in, rmp->in);
if (lmp->in == NULL)
goto done;
if (lmp->right[0] != '\0' &&
rmp->left[0] != '\0')
{
char *tp;
tp = icpyalloc(lmp->right);
if (tp == NULL)
goto done;
tp = icatalloc(tp, rmp->left);
if (tp == NULL)
goto done;
lmp->in = enlist(lmp->in, tp,
strlen(tp));
free(tp);
if (lmp->in == NULL)
goto done;
}
if (lmp->is[0] != '\0')
{
lmp->left = icatalloc(lmp->left,
rmp->left);
if (lmp->left == NULL)
goto done;
}
if (rmp->is[0] == '\0')
lmp->right[0] = '\0';
lmp->right = icatalloc(lmp->right, rmp->right);
if (lmp->right == NULL)
goto done;
if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
{
lmp->is = icatalloc(lmp->is, rmp->is);
if (lmp->is == NULL)
goto done;
}
else
lmp->is[0] = '\0';
}
break;
default:
if (t < END)
{
goto done;
}
else if (t == '\0')
{
goto done;
}
else if (t >= CSET)
{
resetmust(mp);
}
else
{
resetmust(mp);
mp->is[0] = mp->left[0] = mp->right[0] = t;
mp->is[1] = mp->left[1] = mp->right[1] = '\0';
mp->in = enlist(mp->in, mp->is, (size_t)1);
if (mp->in == NULL)
goto done;
}
break;
}
#ifdef DEBUG
fprintf(stderr, " node: %d:", ri);
prtok(dfa->tokens[ri]);
fprintf(stderr, "\n in:");
for (i = 0; mp->in[i]; ++i)
fprintf(stderr, " \"%s\"", mp->in[i]);
fprintf(stderr, "\n is: \"%s\"\n", mp->is);
fprintf(stderr, " left: \"%s\"\n", mp->left);
fprintf(stderr, " right: \"%s\"\n", mp->right);
#endif
++mp;
}
done:
if (strlen(result))
{
dm = (struct dfamust *) malloc(sizeof (struct dfamust));
dm->exact = exact;
dm->must = malloc(strlen(result) + 1);
strcpy(dm->must, result);
dm->next = dfa->musts;
dfa->musts = dm;
}
mp = musts;
for (i = 0; i <= dfa->tindex; ++i)
{
freelist(mp[i].in);
ifree((char *) mp[i].in);
ifree(mp[i].left);
ifree(mp[i].right);
ifree(mp[i].is);
}
free((char *) mp);
}