#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <sys/types.h>
#include "system.h"
#include "grep.h"
#include "regex.h"
#include "dfa.h"
#include "kwset.h"
#define NCHAR (UCHAR_MAX + 1)
static void Gcompile PARAMS((char *, size_t));
static void Ecompile PARAMS((char *, size_t));
static char *EGexecute PARAMS((char *, size_t, char **));
static void Fcompile PARAMS((char *, size_t));
static char *Fexecute PARAMS((char *, size_t, char **));
static void kwsinit PARAMS((void));
struct matcher matchers[] = {
{ "default", Gcompile, EGexecute },
{ "grep", Gcompile, EGexecute },
{ "egrep", Ecompile, EGexecute },
{ "awk", Ecompile, EGexecute },
{ "fgrep", Fcompile, Fexecute },
{ 0, 0, 0 },
};
#define WCHAR(C) (ISALNUM(C) || (C) == '_')
static struct dfa dfa;
static struct re_pattern_buffer regexbuf;
static kwset_t kwset;
static int lastexact;
void
dfaerror (char const *mesg)
{
fatal(mesg, 0);
}
static void
kwsinit (void)
{
static char trans[NCHAR];
int i;
if (match_icase)
for (i = 0; i < NCHAR; ++i)
trans[i] = TOLOWER(i);
if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0)))
fatal("memory exhausted", 0);
}
static void
kwsmusts (void)
{
struct dfamust *dm;
char *err;
if (dfa.musts)
{
kwsinit();
for (dm = dfa.musts; dm; dm = dm->next)
{
if (!dm->exact)
continue;
++lastexact;
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
fatal(err, 0);
}
for (dm = dfa.musts; dm; dm = dm->next)
{
if (dm->exact)
continue;
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
fatal(err, 0);
}
if ((err = kwsprep(kwset)) != 0)
fatal(err, 0);
}
}
static void
Gcompile (char *pattern, size_t size)
{
const char *err;
re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
if ((err = re_compile_pattern(pattern, size, ®exbuf)) != 0)
fatal(err, 0);
if (match_words || match_lines)
{
char *n = malloc(size + 50);
int i = 0;
strcpy(n, "");
if (match_lines)
strcpy(n, "^\\(");
if (match_words)
strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\(");
i = strlen(n);
memcpy(n + i, pattern, size);
i += size;
if (match_words)
strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)");
if (match_lines)
strcpy(n + i, "\\)$");
i += strlen(n + i);
dfacomp(n, i, &dfa, 1);
}
else
dfacomp(pattern, size, &dfa, 1);
kwsmusts();
}
static void
Ecompile (char *pattern, size_t size)
{
const char *err;
if (strcmp(matcher, "awk") == 0)
{
re_set_syntax(RE_SYNTAX_AWK);
dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte);
}
else
{
re_set_syntax (RE_SYNTAX_POSIX_EGREP);
dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
}
if ((err = re_compile_pattern(pattern, size, ®exbuf)) != 0)
fatal(err, 0);
if (match_words || match_lines)
{
char *n = malloc(size + 50);
int i = 0;
strcpy(n, "");
if (match_lines)
strcpy(n, "^(");
if (match_words)
strcpy(n, "(^|[^[:alnum:]_])(");
i = strlen(n);
memcpy(n + i, pattern, size);
i += size;
if (match_words)
strcpy(n + i, ")([^[:alnum:]_]|$)");
if (match_lines)
strcpy(n + i, ")$");
i += strlen(n + i);
dfacomp(n, i, &dfa, 1);
}
else
dfacomp(pattern, size, &dfa, 1);
kwsmusts();
}
static char *
EGexecute (char *buf, size_t size, char **endp)
{
register char *buflim, *beg, *end, save;
char eol = eolbyte;
int backref, start, len;
struct kwsmatch kwsm;
static struct re_registers regs;
buflim = buf + size;
for (beg = end = buf; end < buflim; beg = end + 1)
{
if (kwset)
{
beg = kwsexec(kwset, beg, buflim - beg, &kwsm);
if (!beg)
goto failure;
end = memchr(beg, eol, buflim - beg);
if (!end)
end = buflim;
while (beg > buf && beg[-1] != eol)
--beg;
save = *end;
if (kwsm.index < lastexact)
goto success;
if (!dfaexec(&dfa, beg, end, 0, (int *) 0, &backref))
{
*end = save;
continue;
}
*end = save;
if (!backref)
goto success;
}
else
{
save = *buflim;
beg = dfaexec(&dfa, beg, buflim, 0, (int *) 0, &backref);
*buflim = save;
if (!beg)
goto failure;
end = memchr(beg, eol, buflim - beg);
if (!end)
end = buflim;
while (beg > buf && beg[-1] != eol)
--beg;
if (!backref)
goto success;
}
regexbuf.not_eol = 0;
if ((start = re_search(®exbuf, beg, end - beg, 0, end - beg, ®s)) >= 0)
{
len = regs.end[0] - start;
if ((!match_lines && !match_words)
|| (match_lines && len == end - beg))
goto success;
if (match_words)
while (start >= 0)
{
if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
&& (len == end - beg
|| !WCHAR ((unsigned char) beg[start + len])))
goto success;
if (len > 0)
{
--len;
regexbuf.not_eol = 1;
len = re_match(®exbuf, beg, start + len, start, ®s);
}
if (len <= 0)
{
if (start == end - beg)
break;
++start;
regexbuf.not_eol = 0;
start = re_search(®exbuf, beg, end - beg,
start, end - beg - start, ®s);
len = regs.end[0] - start;
}
}
}
}
failure:
return 0;
success:
*endp = end < buflim ? end + 1 : end;
return beg;
}
static void
Fcompile (char *pattern, size_t size)
{
char *beg, *lim, *err;
kwsinit();
beg = pattern;
do
{
for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
;
if ((err = kwsincr(kwset, beg, lim - beg)) != 0)
fatal(err, 0);
if (lim < pattern + size)
++lim;
beg = lim;
}
while (beg < pattern + size);
if ((err = kwsprep(kwset)) != 0)
fatal(err, 0);
}
static char *
Fexecute (char *buf, size_t size, char **endp)
{
register char *beg, *try, *end;
register size_t len;
char eol = eolbyte;
struct kwsmatch kwsmatch;
for (beg = buf; beg <= buf + size; ++beg)
{
if (!(beg = kwsexec(kwset, beg, buf + size - beg, &kwsmatch)))
return 0;
len = kwsmatch.size[0];
if (match_lines)
{
if (beg > buf && beg[-1] != eol)
continue;
if (beg + len < buf + size && beg[len] != eol)
continue;
goto success;
}
else if (match_words)
for (try = beg; len && try;)
{
if (try > buf && WCHAR((unsigned char) try[-1]))
break;
if (try + len < buf + size && WCHAR((unsigned char) try[len]))
{
try = kwsexec(kwset, beg, --len, &kwsmatch);
len = kwsmatch.size[0];
}
else
goto success;
}
else
goto success;
}
return 0;
success:
if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0)
++end;
else
end = buf + size;
*endp = end;
while (beg > buf && beg[-1] != '\n')
--beg;
return beg;
}