#include <sys_defs.h>
#include <ctype.h>
#include <string.h>
#include <vstring.h>
#include <msg.h>
#include <stringops.h>
#include "lex_822.h"
#include "quote_822_local.h"
#include "tok822.h"
#define COLLECT(t,s,c,cond) { \
while ((c = *(unsigned char *) s) != 0) { \
if (c == '\\') { \
if ((c = *(unsigned char *)++s) == 0) \
break; \
} else if (!(cond)) { \
break; \
} \
VSTRING_ADDCH(t->vstr, IS_SPACE_TAB_CR_LF(c) ? ' ' : c); \
s++; \
} \
VSTRING_TERMINATE(t->vstr); \
}
#define COLLECT_SKIP_LAST(t,s,c,cond) { COLLECT(t,s,c,cond); if (*s) s++; }
#define SKIP(tp, cond) { \
while (tp->type && (cond)) \
tp = tp->prev; \
}
#define MOVE_COMMENT_AND_CONTINUE(tp, right) { \
TOK822 *prev = tok822_unlink(tp); \
right = tok822_prepend(right, tp); \
tp = prev; \
continue; \
}
#define SKIP_MOVE_COMMENT(tp, cond, right) { \
while (tp->type && (cond)) { \
if (tp->type == TOK822_COMMENT) \
MOVE_COMMENT_AND_CONTINUE(tp, right); \
tp = tp->prev; \
} \
}
static char tok822_opchar[] = "|%!" LEX_822_SPECIALS;
static void tok822_quote_atom(TOK822 *);
static const char *tok822_comment(TOK822 *, const char *);
static TOK822 *tok822_group(int, TOK822 *, TOK822 *, int);
static void tok822_copy_quoted(VSTRING *, char *, char *);
static int tok822_append_space(TOK822 *);
#define DO_WORD (1<<0)
#define DO_GROUP (1<<1)
#define ADD_COMMA ','
#define NO_MISSING_COMMA 0
VSTRING *tok822_internalize(VSTRING *vp, TOK822 *tree, int flags)
{
TOK822 *tp;
if (flags & TOK822_STR_WIPE)
VSTRING_RESET(vp);
for (tp = tree; tp; tp = tp->next) {
switch (tp->type) {
case ',':
VSTRING_ADDCH(vp, tp->type);
if (flags & TOK822_STR_LINE) {
VSTRING_ADDCH(vp, '\n');
continue;
}
break;
case TOK822_ADDR:
tok822_internalize(vp, tp->head, TOK822_STR_NONE);
break;
case TOK822_COMMENT:
case TOK822_ATOM:
case TOK822_QSTRING:
vstring_strcat(vp, vstring_str(tp->vstr));
break;
case TOK822_DOMLIT:
VSTRING_ADDCH(vp, '[');
vstring_strcat(vp, vstring_str(tp->vstr));
VSTRING_ADDCH(vp, ']');
break;
case TOK822_STARTGRP:
VSTRING_ADDCH(vp, ':');
break;
default:
if (tp->type >= TOK822_MINTOK)
msg_panic("tok822_internalize: unknown operator %d", tp->type);
VSTRING_ADDCH(vp, tp->type);
}
if (tok822_append_space(tp))
VSTRING_ADDCH(vp, ' ');
}
if (flags & TOK822_STR_TERM)
VSTRING_TERMINATE(vp);
return (vp);
}
static void strip_address(VSTRING *vp, ssize_t start, TOK822 *addr)
{
VSTRING *tmp;
VSTRING_TERMINATE(vp);
msg_warn("stripping too many comments from address: %.100s...",
printable(vstring_str(vp) + start, '?'));
vstring_truncate(vp, start);
VSTRING_ADDCH(vp, '<');
if (addr) {
tmp = vstring_alloc(100);
tok822_internalize(tmp, addr, TOK822_STR_TERM);
quote_822_local_flags(vp, vstring_str(tmp),
QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND);
vstring_free(tmp);
}
VSTRING_ADDCH(vp, '>');
}
VSTRING *tok822_externalize(VSTRING *vp, TOK822 *tree, int flags)
{
VSTRING *tmp;
TOK822 *tp;
ssize_t start;
TOK822 *addr;
ssize_t addr_len;
#define MAX_NONADDR_LENGTH 250
#define RESET_NONADDR_LENGTH { \
start = VSTRING_LEN(vp); \
addr = 0; \
addr_len = 0; \
}
#define ENFORCE_NONADDR_LENGTH do { \
if (addr && VSTRING_LEN(vp) - addr_len > start + MAX_NONADDR_LENGTH) \
strip_address(vp, start, addr->head); \
} while(0)
if (flags & TOK822_STR_WIPE)
VSTRING_RESET(vp);
if (flags & TOK822_STR_TRNC)
RESET_NONADDR_LENGTH;
for (tp = tree; tp; tp = tp->next) {
switch (tp->type) {
case ',':
if (flags & TOK822_STR_TRNC)
ENFORCE_NONADDR_LENGTH;
VSTRING_ADDCH(vp, tp->type);
VSTRING_ADDCH(vp, (flags & TOK822_STR_LINE) ? '\n' : ' ');
if (flags & TOK822_STR_TRNC)
RESET_NONADDR_LENGTH;
continue;
case TOK822_ADDR:
addr = tp;
tmp = vstring_alloc(100);
tok822_internalize(tmp, tp->head, TOK822_STR_TERM);
addr_len = VSTRING_LEN(vp);
quote_822_local_flags(vp, vstring_str(tmp),
QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND);
addr_len = VSTRING_LEN(vp) - addr_len;
vstring_free(tmp);
break;
case TOK822_ATOM:
case TOK822_COMMENT:
vstring_strcat(vp, vstring_str(tp->vstr));
break;
case TOK822_QSTRING:
VSTRING_ADDCH(vp, '"');
tok822_copy_quoted(vp, vstring_str(tp->vstr), "\"\\\r\n");
VSTRING_ADDCH(vp, '"');
break;
case TOK822_DOMLIT:
VSTRING_ADDCH(vp, '[');
tok822_copy_quoted(vp, vstring_str(tp->vstr), "\\\r\n");
VSTRING_ADDCH(vp, ']');
break;
case TOK822_STARTGRP:
VSTRING_ADDCH(vp, ':');
break;
case '<':
if (tp->next && tp->next->type == '>') {
addr = tp;
addr_len = 0;
}
VSTRING_ADDCH(vp, '<');
break;
default:
if (tp->type >= TOK822_MINTOK)
msg_panic("tok822_externalize: unknown operator %d", tp->type);
VSTRING_ADDCH(vp, tp->type);
}
if (tok822_append_space(tp))
VSTRING_ADDCH(vp, ' ');
}
if (flags & TOK822_STR_TRNC)
ENFORCE_NONADDR_LENGTH;
if (flags & TOK822_STR_TERM)
VSTRING_TERMINATE(vp);
return (vp);
}
static void tok822_copy_quoted(VSTRING *vp, char *str, char *quote_set)
{
int ch;
while ((ch = *(unsigned char *) str++) != 0) {
if (strchr(quote_set, ch))
VSTRING_ADDCH(vp, '\\');
VSTRING_ADDCH(vp, ch);
}
}
static int tok822_append_space(TOK822 *tp)
{
TOK822 *next;
if (tp == 0 || (next = tp->next) == 0 || tp->owner != 0)
return (0);
if (tp->type == ',' || tp->type == TOK822_STARTGRP || next->type == '<')
return (1);
#define NON_OPERATOR(x) \
(x->type == TOK822_ATOM || x->type == TOK822_QSTRING \
|| x->type == TOK822_COMMENT || x->type == TOK822_DOMLIT \
|| x->type == TOK822_ADDR)
return (NON_OPERATOR(tp) && NON_OPERATOR(next));
}
TOK822 *tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit)
{
TOK822 *head = 0;
TOK822 *tail = 0;
TOK822 *tp;
int ch;
int tok_count = 0;
while ((ch = *(unsigned char *) str++) != 0) {
if (IS_SPACE_TAB_CR_LF(ch))
continue;
if (ch == '(') {
tp = tok822_alloc(TOK822_COMMENT, (char *) 0);
str = tok822_comment(tp, str);
} else if (ch == '[') {
tp = tok822_alloc(TOK822_DOMLIT, (char *) 0);
COLLECT_SKIP_LAST(tp, str, ch, ch != ']');
} else if (ch == '"') {
tp = tok822_alloc(TOK822_QSTRING, (char *) 0);
COLLECT_SKIP_LAST(tp, str, ch, ch != '"');
} else if (ch != '\\' && strchr(tok822_opchar, ch)) {
tp = tok822_alloc(ch, (char *) 0);
} else {
tp = tok822_alloc(TOK822_ATOM, (char *) 0);
str -= 1;
COLLECT(tp, str, ch, !IS_SPACE_TAB_CR_LF(ch) && !strchr(tok822_opchar, ch));
tok822_quote_atom(tp);
}
if (head == 0) {
head = tail = tp;
while (tail->next)
tail = tail->next;
} else {
tail = tok822_append(tail, tp);
}
if (tok_count_limit > 0 && ++tok_count >= tok_count_limit)
break;
}
if (tailp)
*tailp = tail;
return (head);
}
TOK822 *tok822_parse_limit(const char *str, int tok_count_limit)
{
TOK822 *head;
TOK822 *tail;
TOK822 *right;
TOK822 *first_token;
TOK822 *last_token;
TOK822 *tp;
int state;
if ((first_token = tok822_scan_limit(str, &last_token, tok_count_limit)) == 0)
return (0);
#define GLUE(left,rite) { left->next = rite; rite->prev = left; }
head = tok822_alloc(0, (char *) 0);
GLUE(head, first_token);
tail = tok822_alloc(0, (char *) 0);
GLUE(last_token, tail);
state = DO_WORD;
right = tail;
tp = tail->prev;
while (tp->type) {
if (tp->type == TOK822_COMMENT) {
MOVE_COMMENT_AND_CONTINUE(tp, right);
} else if (tp->type == ';') {
right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA);
state = DO_GROUP | DO_WORD;
} else if (tp->type == ':' && (state & DO_GROUP) != 0) {
tp->type = TOK822_STARTGRP;
(void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
SKIP(tp, tp->type != ',');
right = tp;
continue;
} else if (tp->type == '>') {
right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA);
SKIP_MOVE_COMMENT(tp, tp->type != '<', right);
(void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
SKIP(tp, tp->type > 0xff || strchr(">;,:", tp->type) == 0);
right = tp;
state |= DO_WORD;
continue;
} else if (tp->type == TOK822_ATOM || tp->type == TOK822_QSTRING
|| tp->type == TOK822_DOMLIT) {
if ((state & DO_WORD) == 0)
right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA)->next;
state &= ~DO_WORD;
} else if (tp->type == ',') {
right = tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
state |= DO_WORD;
} else {
state |= DO_WORD;
}
tp = tp->prev;
}
(void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
tp = (head->next != tail ? head->next : 0);
tok822_cut_before(head->next);
tok822_free(head);
tok822_cut_before(tail);
tok822_free(tail);
return (tp);
}
static void tok822_quote_atom(TOK822 *tp)
{
char *cp;
int ch;
for (cp = vstring_str(tp->vstr); (ch = *(unsigned char *) cp) != 0; cp++) {
if ( ch == ' '
|| ISCNTRL(ch) || strchr(tok822_opchar, ch)) {
tp->type = TOK822_QSTRING;
break;
}
}
}
static const char *tok822_comment(TOK822 *tp, const char *str)
{
int level = 1;
int ch;
VSTRING_ADDCH(tp->vstr, '(');
while ((ch = *(unsigned char *) str) != 0) {
VSTRING_ADDCH(tp->vstr, ch);
str++;
if (ch == '(') {
level++;
} else if (ch == ')') {
if (--level == 0)
break;
} else if (ch == '\\') {
if ((ch = *(unsigned char *) str) == 0)
break;
VSTRING_ADDCH(tp->vstr, ch);
str++;
}
}
VSTRING_TERMINATE(tp->vstr);
return (str);
}
static TOK822 *tok822_group(int group_type, TOK822 *left, TOK822 *right, int sync_type)
{
TOK822 *group;
TOK822 *sync;
TOK822 *first;
if (left != right && (first = left->next) != right) {
tok822_cut_before(right);
tok822_cut_before(first);
group = tok822_alloc(group_type, (char *) 0);
tok822_sub_append(group, first);
tok822_append(left, group);
tok822_append(group, right);
if (sync_type) {
sync = tok822_alloc(sync_type, (char *) 0);
tok822_append(left, sync);
}
}
return (left);
}
TOK822 *tok822_scan_addr(const char *addr)
{
TOK822 *tree = tok822_alloc(TOK822_ADDR, (char *) 0);
tree->head = tok822_scan(addr, &tree->tail);
return (tree);
}
#ifdef TEST
#include <unistd.h>
#include <vstream.h>
#include <readlline.h>
static void tok822_print(TOK822 *list, int indent)
{
TOK822 *tp;
for (tp = list; tp; tp = tp->next) {
if (tp->type < TOK822_MINTOK) {
vstream_printf("%*s %s \"%c\"\n", indent, "", "OP", tp->type);
} else if (tp->type == TOK822_ADDR) {
vstream_printf("%*s %s\n", indent, "", "address");
tok822_print(tp->head, indent + 2);
} else if (tp->type == TOK822_STARTGRP) {
vstream_printf("%*s %s\n", indent, "", "group \":\"");
} else {
vstream_printf("%*s %s \"%s\"\n", indent, "",
tp->type == TOK822_COMMENT ? "comment" :
tp->type == TOK822_ATOM ? "atom" :
tp->type == TOK822_QSTRING ? "quoted string" :
tp->type == TOK822_DOMLIT ? "domain literal" :
tp->type == TOK822_ADDR ? "address" :
"unknown\n", vstring_str(tp->vstr));
}
}
}
int main(int unused_argc, char **unused_argv)
{
VSTRING *vp = vstring_alloc(100);
TOK822 *list;
VSTRING *buf = vstring_alloc(100);
#define TEST_TOKEN_LIMIT 20
while (readlline(buf, VSTREAM_IN, (int *) 0)) {
while (VSTRING_LEN(buf) > 0 && vstring_end(buf)[-1] == '\n') {
vstring_end(buf)[-1] = 0;
vstring_truncate(buf, VSTRING_LEN(buf) - 1);
}
if (!isatty(vstream_fileno(VSTREAM_IN)))
vstream_printf(">>>%s<<<\n\n", vstring_str(buf));
list = tok822_parse_limit(vstring_str(buf), TEST_TOKEN_LIMIT);
vstream_printf("Parse tree:\n");
tok822_print(list, 0);
vstream_printf("\n");
vstream_printf("Internalized:\n%s\n\n",
vstring_str(tok822_internalize(vp, list, TOK822_STR_DEFL)));
vstream_fflush(VSTREAM_OUT);
vstream_printf("Externalized, no newlines inserted:\n%s\n\n",
vstring_str(tok822_externalize(vp, list,
TOK822_STR_DEFL | TOK822_STR_TRNC)));
vstream_fflush(VSTREAM_OUT);
vstream_printf("Externalized, newlines inserted:\n%s\n\n",
vstring_str(tok822_externalize(vp, list,
TOK822_STR_DEFL | TOK822_STR_LINE | TOK822_STR_TRNC)));
vstream_fflush(VSTREAM_OUT);
tok822_free_tree(list);
}
vstring_free(vp);
vstring_free(buf);
return (0);
}
#endif