#include "lib.h"
#include "str.h"
#include "strescape.h"
#include "rfc822-parser.h"
unsigned char rfc822_atext_chars[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4,
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
void rfc822_parser_init(struct rfc822_parser_context *ctx,
const unsigned char *data, size_t size,
string_t *last_comment)
{
memset(ctx, 0, sizeof(*ctx));
ctx->data = data;
ctx->end = data + size;
ctx->last_comment = last_comment;
}
int rfc822_skip_comment(struct rfc822_parser_context *ctx)
{
const unsigned char *start;
int level = 1;
i_assert(*ctx->data == '(');
if (ctx->last_comment != NULL)
str_truncate(ctx->last_comment, 0);
start = ++ctx->data;
for (; ctx->data != ctx->end; ctx->data++) {
switch (*ctx->data) {
case '(':
level++;
break;
case ')':
if (--level == 0) {
if (ctx->last_comment != NULL) {
str_append_n(ctx->last_comment, start,
ctx->data - start);
}
ctx->data++;
return ctx->data != ctx->end;
}
break;
case '\\':
if (ctx->last_comment != NULL) {
str_append_n(ctx->last_comment, start,
ctx->data - start);
}
start = ctx->data + 1;
ctx->data++;
if (ctx->data == ctx->end)
return -1;
break;
}
}
return -1;
}
int rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
{
for (; ctx->data != ctx->end;) {
if (*ctx->data == ' ' || *ctx->data == '\t' ||
*ctx->data == '\r' || *ctx->data == '\n') {
ctx->data++;
continue;
}
if (*ctx->data != '(')
break;
if (rfc822_skip_comment(ctx) < 0)
return -1;
}
return ctx->data != ctx->end;
}
int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data))
return -1;
for (start = ctx->data++; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT(*ctx->data))
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
int ret;
if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data))
return -1;
for (start = ctx->data++; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT(*ctx->data))
continue;
str_append_n(str, start, ctx->data - start);
if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
return ret;
if (*ctx->data != '.')
return 1;
ctx->data++;
str_append_c(str, '.');
if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
return ret;
start = ctx->data;
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_mime_token(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT_NON_TSPECIAL(*ctx->data) || *ctx->data == '.')
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
size_t len;
i_assert(*ctx->data == '"');
ctx->data++;
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
switch (*ctx->data) {
case '"':
str_append_n(str, start, ctx->data - start);
ctx->data++;
return rfc822_skip_lwsp(ctx);
case '\n':
len = ctx->data - start;
if (len > 0 && start[len-1] == '\r')
len--;
str_append_n(str, start, len);
start = ctx->data + 1;
break;
case '\\':
ctx->data++;
if (ctx->data == ctx->end)
return -1;
str_append_n(str, start, ctx->data - start);
start = ctx->data;
break;
}
}
return -1;
}
static int
rfc822_parse_atom_or_dot(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT(*ctx->data) || *ctx->data == '.')
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str)
{
int ret;
if (ctx->data == ctx->end)
return 0;
if (*ctx->data == '.')
return -1;
for (;;) {
if (*ctx->data == '"')
ret = rfc822_parse_quoted_string(ctx, str);
else
ret = rfc822_parse_atom_or_dot(ctx, str);
if (ret <= 0)
return ret;
if (!IS_ATEXT(*ctx->data) && *ctx->data != '"'
&& *ctx->data != '.')
break;
str_append_c(str, ' ');
}
return rfc822_skip_lwsp(ctx);
}
static int
rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
i_assert(*ctx->data == '[');
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (*ctx->data == '\\') {
ctx->data++;
if (ctx->data == ctx->end)
break;
} else if (*ctx->data == ']') {
ctx->data++;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
}
return -1;
}
int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
{
i_assert(*ctx->data == '@');
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
if (*ctx->data == '[')
return rfc822_parse_domain_literal(ctx, str);
else
return rfc822_parse_dot_atom(ctx, str);
}
int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str)
{
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
if (rfc822_parse_mime_token(ctx, str) <= 0)
return -1;
if (*ctx->data != '/')
return -1;
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
str_append_c(str, '/');
return rfc822_parse_mime_token(ctx, str);
}
int rfc822_parse_content_param(struct rfc822_parser_context *ctx,
const char **key_r, const char **value_r)
{
string_t *tmp;
size_t value_pos;
int ret;
*key_r = NULL;
*value_r = NULL;
if (ctx->data == ctx->end)
return 0;
if (*ctx->data != ';')
return -1;
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
tmp = t_str_new(64);
if (rfc822_parse_mime_token(ctx, tmp) <= 0)
return -1;
str_append_c(tmp, '\0');
value_pos = str_len(tmp);
if (*ctx->data != '=')
return -1;
ctx->data++;
if ((ret = rfc822_skip_lwsp(ctx)) <= 0) {
} else if (*ctx->data == '"') {
ret = rfc822_parse_quoted_string(ctx, tmp);
str_unescape(str_c_modifiable(tmp) + value_pos);
} else if (ctx->data != ctx->end && *ctx->data == '=') {
while (ctx->data != ctx->end && *ctx->data != ';' &&
*ctx->data != ' ' && *ctx->data != '\t' &&
*ctx->data != '\r' && *ctx->data != '\n') {
str_append_c(tmp, *ctx->data);
ctx->data++;
}
} else {
ret = rfc822_parse_mime_token(ctx, tmp);
}
*key_r = str_c(tmp);
*value_r = *key_r + value_pos;
return ret < 0 ? -1 : 1;
}