#include "sys_defs.h"
#ifdef HAS_PCRE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include "mymalloc.h"
#include "msg.h"
#include "safe.h"
#include "vstream.h"
#include "vstring.h"
#include "stringops.h"
#include "readlline.h"
#include "dict.h"
#include "dict_pcre.h"
#include "mac_parse.h"
#include "pcre.h"
#define DICT_PCRE_OP_MATCH 1
#define DICT_PCRE_OP_IF 2
#define DICT_PCRE_OP_ENDIF 3
#define PCRE_MAX_CAPTURE 99
typedef struct {
char *regexp;
int options;
int match;
} DICT_PCRE_REGEXP;
typedef struct {
pcre *pattern;
pcre_extra *hints;
} DICT_PCRE_ENGINE;
typedef struct DICT_PCRE_RULE {
int op;
int nesting;
int lineno;
struct DICT_PCRE_RULE *next;
} DICT_PCRE_RULE;
typedef struct {
DICT_PCRE_RULE rule;
pcre *pattern;
pcre_extra *hints;
char *replacement;
int match;
} DICT_PCRE_MATCH_RULE;
typedef struct {
DICT_PCRE_RULE rule;
pcre *pattern;
pcre_extra *hints;
int match;
} DICT_PCRE_IF_RULE;
typedef struct {
DICT dict;
DICT_PCRE_RULE *head;
} DICT_PCRE;
static int dict_pcre_init = 0;
typedef struct {
const char *mapname;
int lineno;
VSTRING *expansion_buf;
const char *lookup_string;
int offsets[PCRE_MAX_CAPTURE * 3];
int matches;
} DICT_PCRE_EXPAND_CONTEXT;
typedef struct {
const char *mapname;
int lineno;
int flags;
size_t max_sub;
} DICT_PCRE_PRESCAN_CONTEXT;
#ifndef MAC_PARSE_OK
#define MAC_PARSE_OK 0
#endif
#define NULL_STARTOFFSET (0)
#define NULL_EXEC_OPTIONS (0)
#define NULL_OVECTOR ((int *) 0)
#define NULL_OVECTOR_LENGTH (0)
static int dict_pcre_expand(int type, VSTRING *buf, char *ptr)
{
DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
const char *pp;
int n;
int ret;
if (type == MAC_PARSE_VARNAME) {
n = atoi(vstring_str(buf));
ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
ctxt->matches, n, &pp);
if (ret < 0) {
if (ret == PCRE_ERROR_NOSUBSTRING)
msg_fatal("regexp %s, line %d: replace index out of range",
ctxt->mapname, ctxt->lineno);
else
msg_fatal("regexp %s, line %d: pcre_get_substring error: %d",
ctxt->mapname, ctxt->lineno, ret);
}
if (*pp == 0) {
myfree((char *) pp);
return (MAC_PARSE_UNDEF);
}
vstring_strcat(ctxt->expansion_buf, pp);
myfree((char *) pp);
return (MAC_PARSE_OK);
}
else {
vstring_strcat(ctxt->expansion_buf, vstring_str(buf));
return (MAC_PARSE_OK);
}
}
static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
{
switch (errval) {
case 0:
msg_warn("pcre map %s, line %d: too many (...)",
mapname, lineno);
return;
case PCRE_ERROR_NULL:
case PCRE_ERROR_BADOPTION:
msg_fatal("pcre map %s, line %d: bad args to re_exec",
mapname, lineno);
case PCRE_ERROR_BADMAGIC:
case PCRE_ERROR_UNKNOWN_NODE:
msg_fatal("pcre map %s, line %d: corrupt compiled regexp",
mapname, lineno);
default:
msg_fatal("pcre map %s, line %d: unknown re_exec error: %d",
mapname, lineno, errval);
}
}
static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
{
DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
DICT_PCRE_RULE *rule;
DICT_PCRE_IF_RULE *if_rule;
DICT_PCRE_MATCH_RULE *match_rule;
int lookup_len = strlen(lookup_string);
DICT_PCRE_EXPAND_CONTEXT ctxt;
static VSTRING *expansion_buf;
int nesting = 0;
dict_errno = 0;
if (msg_verbose)
msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
for (rule = dict_pcre->head; rule; rule = rule->next) {
if (nesting < rule->nesting)
continue;
switch (rule->op) {
case DICT_PCRE_OP_MATCH:
match_rule = (DICT_PCRE_MATCH_RULE *) rule;
ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints,
lookup_string, lookup_len,
NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
ctxt.offsets, PCRE_MAX_CAPTURE * 3);
if (ctxt.matches > 0) {
if (!match_rule->match)
continue;
} else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
if (match_rule->match)
continue;
} else {
dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
continue;
}
if (!match_rule->match)
return match_rule->replacement;
if (expansion_buf == 0)
expansion_buf = vstring_alloc(10);
VSTRING_RESET(expansion_buf);
ctxt.expansion_buf = expansion_buf;
ctxt.lookup_string = lookup_string;
ctxt.mapname = dict->name;
ctxt.lineno = rule->lineno;
if (mac_parse(match_rule->replacement, dict_pcre_expand,
(char *) &ctxt) & MAC_PARSE_ERROR)
msg_fatal("pcre map %s, line %d: bad replacement syntax",
dict->name, rule->lineno);
VSTRING_TERMINATE(expansion_buf);
return (vstring_str(expansion_buf));
case DICT_PCRE_OP_IF:
if_rule = (DICT_PCRE_IF_RULE *) rule;
ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints,
lookup_string, lookup_len,
NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
ctxt.offsets, PCRE_MAX_CAPTURE * 3);
if (ctxt.matches > 0) {
if (!if_rule->match)
continue;
} else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
if (if_rule->match)
continue;
} else {
dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
continue;
}
nesting++;
continue;
case DICT_PCRE_OP_ENDIF:
nesting--;
continue;
default:
msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
}
}
return (0);
}
static void dict_pcre_close(DICT *dict)
{
DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
DICT_PCRE_RULE *rule;
DICT_PCRE_RULE *next;
DICT_PCRE_MATCH_RULE *match_rule;
DICT_PCRE_IF_RULE *if_rule;
for (rule = dict_pcre->head; rule; rule = next) {
next = rule->next;
switch (rule->op) {
case DICT_PCRE_OP_MATCH:
match_rule = (DICT_PCRE_MATCH_RULE *) rule;
if (match_rule->pattern)
myfree((char *) match_rule->pattern);
if (match_rule->hints)
myfree((char *) match_rule->hints);
if (match_rule->replacement)
myfree((char *) match_rule->replacement);
break;
case DICT_PCRE_OP_IF:
if_rule = (DICT_PCRE_IF_RULE *) rule;
if (if_rule->pattern)
myfree((char *) if_rule->pattern);
if (if_rule->hints)
myfree((char *) if_rule->hints);
break;
case DICT_PCRE_OP_ENDIF:
break;
default:
msg_panic("dict_pcre_close: unknown operation %d", rule->op);
}
myfree((char *) rule);
}
dict_free(dict);
}
static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
DICT_PCRE_REGEXP *pattern)
{
char *p = *bufp;
char re_delimiter;
pattern->match = 1;
while (*p == '!') {
pattern->match = !pattern->match;
p++;
}
while (*p && ISSPACE(*p))
p++;
if (*p == 0) {
msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
mapname, lineno);
return (0);
}
re_delimiter = *p++;
pattern->regexp = p;
while (*p) {
if (*p == '\\') {
++p;
if (*p == 0)
break;
} else if (*p == re_delimiter)
break;
++p;
}
if (!*p) {
msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
"ignoring this rule", mapname, lineno, re_delimiter);
return (0);
}
*p++ = 0;
pattern->options = PCRE_CASELESS | PCRE_DOTALL;
while (*p && !ISSPACE(*p)) {
switch (*p) {
case 'i':
pattern->options ^= PCRE_CASELESS;
break;
case 'm':
pattern->options ^= PCRE_MULTILINE;
break;
case 's':
pattern->options ^= PCRE_DOTALL;
break;
case 'x':
pattern->options ^= PCRE_EXTENDED;
break;
case 'A':
pattern->options ^= PCRE_ANCHORED;
break;
case 'E':
pattern->options ^= PCRE_DOLLAR_ENDONLY;
break;
case 'U':
pattern->options ^= PCRE_UNGREEDY;
break;
case 'X':
pattern->options ^= PCRE_EXTRA;
break;
default:
msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
"skipping this rule", mapname, lineno, *p);
return (0);
}
++p;
}
*bufp = p;
return (1);
}
static int dict_pcre_prescan(int type, VSTRING *buf, char *context)
{
DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
size_t n;
if (type == MAC_PARSE_VARNAME) {
if (ctxt->flags & DICT_FLAG_NO_REGSUB) {
msg_warn("pcre map %s, line %d: "
"regular expression substitution is not allowed",
ctxt->mapname, ctxt->lineno);
return (MAC_PARSE_ERROR);
}
if (!alldig(vstring_str(buf))) {
msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
ctxt->mapname, ctxt->lineno, vstring_str(buf));
return (MAC_PARSE_ERROR);
}
n = atoi(vstring_str(buf));
if (n < 1) {
msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
ctxt->mapname, ctxt->lineno, vstring_str(buf));
return (MAC_PARSE_ERROR);
}
if (n > ctxt->max_sub)
ctxt->max_sub = n;
}
return (MAC_PARSE_OK);
}
static int dict_pcre_compile(const char *mapname, int lineno,
DICT_PCRE_REGEXP *pattern,
DICT_PCRE_ENGINE *engine)
{
const char *error;
int errptr;
engine->pattern = pcre_compile(pattern->regexp, pattern->options,
&error, &errptr, NULL);
if (engine->pattern == 0) {
msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
mapname, lineno, errptr, error);
return (0);
}
engine->hints = pcre_study(engine->pattern, 0, &error);
if (error != 0) {
msg_warn("pcre map %s, line %d: error while studying regex: %s",
mapname, lineno, error);
myfree((char *) engine->pattern);
return (0);
}
return (1);
}
static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting,
int lineno,
size_t size)
{
DICT_PCRE_RULE *rule;
rule = (DICT_PCRE_RULE *) mymalloc(size);
rule->op = op;
rule->nesting = nesting;
rule->lineno = lineno;
rule->next = 0;
return (rule);
}
static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
char *line, int nesting,
int dict_flags)
{
char *p;
p = line;
if (!ISALNUM(*p)) {
DICT_PCRE_REGEXP regexp;
DICT_PCRE_ENGINE engine;
DICT_PCRE_PRESCAN_CONTEXT prescan_context;
DICT_PCRE_MATCH_RULE *match_rule;
if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0)
return (0);
while (*p && ISSPACE(*p))
++p;
if (!*p)
msg_warn("%s, line %d: no replacement text: using empty string",
mapname, lineno);
prescan_context.mapname = mapname;
prescan_context.lineno = lineno;
prescan_context.flags = dict_flags;
prescan_context.max_sub = 0;
if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context)
& MAC_PARSE_ERROR) {
msg_warn("pcre map %s, line %d: bad replacement syntax: "
"skipping this rule", mapname, lineno);
return (0);
}
if (prescan_context.max_sub > 0 && regexp.match == 0) {
msg_warn("pcre map %s, line %d: $number found in negative match "
"replacement text: skipping this rule", mapname, lineno);
return (0);
}
if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0)
return (0);
match_rule = (DICT_PCRE_MATCH_RULE *)
dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno,
sizeof(DICT_PCRE_MATCH_RULE));
match_rule->match = regexp.match;
match_rule->replacement = mystrdup(p);
match_rule->pattern = engine.pattern;
match_rule->hints = engine.hints;
return ((DICT_PCRE_RULE *) match_rule);
}
else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
DICT_PCRE_REGEXP regexp;
DICT_PCRE_ENGINE engine;
DICT_PCRE_IF_RULE *if_rule;
p += 2;
while (*p && ISSPACE(*p))
p++;
if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp))
return (0);
while (*p && ISSPACE(*p))
++p;
if (*p)
msg_warn("pcre map %s, line %d: ignoring extra text after IF",
mapname, lineno);
if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0)
return (0);
if_rule = (DICT_PCRE_IF_RULE *)
dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno,
sizeof(DICT_PCRE_IF_RULE));
if_rule->match = regexp.match;
if_rule->pattern = engine.pattern;
if_rule->hints = engine.hints;
return ((DICT_PCRE_RULE *) if_rule);
}
else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
DICT_PCRE_RULE *rule;
p += 5;
if (nesting == 0) {
msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
mapname, lineno);
return (0);
}
while (*p && ISSPACE(*p))
++p;
if (*p)
msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
mapname, lineno);
rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno,
sizeof(DICT_PCRE_RULE));
return (rule);
}
else {
msg_warn("regexp map %s, line %d: ignoring unrecognized request",
mapname, lineno);
return (0);
}
}
DICT *dict_pcre_open(const char *mapname, int unused_flags, int dict_flags)
{
DICT_PCRE *dict_pcre;
VSTREAM *map_fp;
VSTRING *line_buffer;
DICT_PCRE_RULE *last_rule = 0;
DICT_PCRE_RULE *rule;
int lineno = 0;
int nesting = 0;
char *p;
line_buffer = vstring_alloc(100);
dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
sizeof(*dict_pcre));
dict_pcre->dict.lookup = dict_pcre_lookup;
dict_pcre->dict.close = dict_pcre_close;
dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
dict_pcre->head = 0;
if (dict_pcre_init == 0) {
pcre_malloc = (void *(*) (size_t)) mymalloc;
pcre_free = (void (*) (void *)) myfree;
dict_pcre_init = 1;
}
if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
msg_fatal("open %s: %m", mapname);
while (readlline(line_buffer, map_fp, &lineno)) {
p = vstring_str(line_buffer);
trimblanks(p, 0)[0] = 0;
if (*p == 0)
continue;
rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags);
if (rule == 0)
continue;
if (rule->op == DICT_PCRE_OP_IF) {
nesting++;
} else if (rule->op == DICT_PCRE_OP_ENDIF) {
nesting--;
}
if (last_rule == 0)
dict_pcre->head = rule;
else
last_rule->next = rule;
last_rule = rule;
}
if (nesting)
msg_warn("pcre map %s, line %d: more IFs than ENDIFs",
mapname, lineno);
vstring_free(line_buffer);
vstream_fclose(map_fp);
return (DICT_DEBUG (&dict_pcre->dict));
}
#endif