#include "lib.h"
#include "compat.h"
#include "str.h"
#include "str-sanitize.h"
#include "istream.h"
#include "sieve-common.h"
#include "sieve-limits.h"
#include "sieve-error.h"
#include "sieve-script.h"
#include "sieve-lexer.h"
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#define DIGIT_VAL(c) ( c - '0' )
inline static void sieve_lexer_error
(const struct sieve_lexer *lexer, const char *fmt, ...) ATTR_FORMAT(2, 3);
inline static void sieve_lexer_warning
(const struct sieve_lexer *lexer, const char *fmt, ...) ATTR_FORMAT(2, 3);
struct sieve_lexical_scanner {
pool_t pool;
struct sieve_instance *svinst;
struct sieve_script *script;
struct istream *input;
struct sieve_error_handler *ehandler;
const unsigned char *buffer;
size_t buffer_size;
size_t buffer_pos;
struct sieve_lexer lexer;
int current_line;
};
const struct sieve_lexer *sieve_lexer_create
(struct sieve_script *script, struct sieve_error_handler *ehandler,
enum sieve_error *error_r)
{
pool_t pool;
struct sieve_lexical_scanner *scanner;
struct sieve_instance *svinst = sieve_script_svinst(script);
struct istream *stream;
const struct stat *st;
stream = sieve_script_open(script, error_r);
if ( stream == NULL )
return NULL;
st = i_stream_stat(stream, TRUE);
if ( st != NULL && st->st_size > 0 && svinst->max_script_size > 0 &&
(uoff_t)st->st_size > svinst->max_script_size ) {
sieve_error(ehandler, sieve_script_name(script),
"sieve script is too large (max %"PRIuSIZE_T" bytes)",
svinst->max_script_size);
if ( error_r != NULL )
*error_r = SIEVE_ERROR_NOT_POSSIBLE;
return NULL;
}
pool = pool_alloconly_create("sieve_lexer_scanner", 1024);
scanner = p_new(pool, struct sieve_lexical_scanner, 1);
scanner->pool = pool;
scanner->lexer.scanner = scanner;
scanner->ehandler = ehandler;
sieve_error_handler_ref(ehandler);
scanner->input = stream;
i_stream_ref(scanner->input);
scanner->script = script;
sieve_script_ref(script);
scanner->buffer = NULL;
scanner->buffer_size = 0;
scanner->buffer_pos = 0;
scanner->lexer.token_type = STT_NONE;
scanner->lexer.token_str_value = str_new(pool, 256);
scanner->lexer.token_int_value = 0;
scanner->lexer.token_line = 1;
scanner->current_line = 1;
return &scanner->lexer;
}
void sieve_lexer_free(const struct sieve_lexer **lexer)
{
struct sieve_lexical_scanner *scanner = (*lexer)->scanner;
i_stream_unref(&scanner->input);
sieve_script_close(scanner->script);
sieve_script_unref(&scanner->script);
sieve_error_handler_unref(&scanner->ehandler);
pool_unref(&scanner->pool);
*lexer = NULL;
}
inline static void sieve_lexer_error
(const struct sieve_lexer *lexer, const char *fmt, ...)
{
struct sieve_lexical_scanner *scanner = lexer->scanner;
va_list args;
va_start(args, fmt);
T_BEGIN {
sieve_verror(scanner->ehandler,
sieve_error_script_location(scanner->script, scanner->current_line),
fmt, args);
} T_END;
va_end(args);
}
inline static void sieve_lexer_warning
(const struct sieve_lexer *lexer, const char *fmt, ...)
{
struct sieve_lexical_scanner *scanner = lexer->scanner;
va_list args;
va_start(args, fmt);
T_BEGIN {
sieve_vwarning(scanner->ehandler,
sieve_error_script_location(scanner->script, scanner->current_line),
fmt, args);
} T_END;
va_end(args);
}
const char *sieve_lexer_token_description
(const struct sieve_lexer *lexer)
{
switch ( lexer->token_type ) {
case STT_NONE: return "no token (bug)";
case STT_WHITESPACE: return "whitespace (bug)";
case STT_EOF: return "end of file";
case STT_NUMBER: return "number";
case STT_IDENTIFIER: return "identifier";
case STT_TAG: return "tag";
case STT_STRING: return "string";
case STT_RBRACKET: return "')'";
case STT_LBRACKET: return "'('";
case STT_RCURLY: return "'}'";
case STT_LCURLY: return "'{'";
case STT_RSQUARE: return "']'";
case STT_LSQUARE: return "'['";
case STT_SEMICOLON: return "';'";
case STT_COMMA: return "','";
case STT_SLASH: return "'/'";
case STT_COLON: return "':'";
case STT_GARBAGE: return "unknown characters";
case STT_ERROR: return "error token (bug)";
}
return "unknown token (bug)";
}
void sieve_lexer_token_print(const struct sieve_lexer *lexer)
{
switch ( lexer->token_type ) {
case STT_NONE: printf("??NONE?? "); break;
case STT_WHITESPACE: printf("??WHITESPACE?? "); break;
case STT_EOF: printf("EOF\n"); break;
case STT_NUMBER: printf("NUMBER "); break;
case STT_IDENTIFIER: printf("IDENTIFIER "); break;
case STT_TAG: printf("TAG "); break;
case STT_STRING: printf("STRING "); break;
case STT_RBRACKET: printf(") "); break;
case STT_LBRACKET: printf("( "); break;
case STT_RCURLY: printf("}\n"); break;
case STT_LCURLY: printf("{\n"); break;
case STT_RSQUARE: printf("] "); break;
case STT_LSQUARE: printf("[ "); break;
case STT_SEMICOLON: printf(";\n"); break;
case STT_COMMA: printf(", "); break;
case STT_SLASH: printf("/ "); break;
case STT_COLON: printf(": "); break;
case STT_GARBAGE: printf(">>GARBAGE<<"); break;
case STT_ERROR: printf(">>ERROR<<"); break;
default:
printf("UNKNOWN ");
break;
}
}
static void sieve_lexer_shift(struct sieve_lexical_scanner *scanner)
{
if ( scanner->buffer != NULL && scanner->buffer[scanner->buffer_pos] == '\n' )
scanner->current_line++;
if ( scanner->buffer != NULL &&
scanner->buffer_pos + 1 < scanner->buffer_size )
scanner->buffer_pos++;
else {
if ( scanner->buffer != NULL )
i_stream_skip(scanner->input, scanner->buffer_size);
scanner->buffer = i_stream_get_data(scanner->input, &scanner->buffer_size);
if ( scanner->buffer == NULL && i_stream_read(scanner->input) > 0 )
scanner->buffer = i_stream_get_data
(scanner->input, &scanner->buffer_size);
scanner->buffer_pos = 0;
}
}
static inline int sieve_lexer_curchar(struct sieve_lexical_scanner *scanner)
{
if ( scanner->buffer == NULL )
return -1;
return scanner->buffer[scanner->buffer_pos];
}
static inline const char *_char_sanitize(int ch)
{
if ( ch > 31 && ch < 127 )
return t_strdup_printf("'%c'", ch);
return t_strdup_printf("0x%02x", ch);
}
static bool sieve_lexer_scan_raw_token(struct sieve_lexical_scanner *scanner)
{
struct sieve_lexer *lexer = &scanner->lexer;
string_t *str;
if ( lexer->token_type == STT_NONE ) {
i_stream_read(scanner->input);
sieve_lexer_shift(scanner);
}
lexer->token_line = scanner->current_line;
switch ( sieve_lexer_curchar(scanner) ) {
case '#':
sieve_lexer_shift(scanner);
while ( sieve_lexer_curchar(scanner) != '\n' ) {
switch( sieve_lexer_curchar(scanner) ) {
case -1:
sieve_lexer_warning
(lexer, "no newline (CRLF) at end of hash comment at end of file");
lexer->token_type = STT_WHITESPACE;
return TRUE;
case '\0':
sieve_lexer_error
(lexer, "encountered NUL character in hash comment");
lexer->token_type = STT_ERROR;
return FALSE;
default:
break;
}
sieve_lexer_shift(scanner);
}
sieve_lexer_shift(scanner);
lexer->token_type = STT_WHITESPACE;
return TRUE;
case '/':
sieve_lexer_shift(scanner);
if ( sieve_lexer_curchar(scanner) == '*' ) {
sieve_lexer_shift(scanner);
while ( TRUE ) {
switch ( sieve_lexer_curchar(scanner) ) {
case -1:
sieve_lexer_error(lexer,
"end of file before end of bracket comment ('/* ... */') "
"started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
case '*':
sieve_lexer_shift(scanner);
if ( sieve_lexer_curchar(scanner) == '/' ) {
sieve_lexer_shift(scanner);
lexer->token_type = STT_WHITESPACE;
return TRUE;
} else if ( sieve_lexer_curchar(scanner) == -1 ) {
sieve_lexer_error(lexer,
"end of file before end of bracket comment ('/* ... */') "
"started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
}
break;
case '\0':
sieve_lexer_error(lexer,
"encountered NUL character in bracket comment");
lexer->token_type = STT_ERROR;
return FALSE;
default:
sieve_lexer_shift(scanner);
}
}
i_unreached();
return FALSE;
}
lexer->token_type = STT_SLASH;
return TRUE;
case '\t':
case '\r':
case '\n':
case ' ':
sieve_lexer_shift(scanner);
while ( sieve_lexer_curchar(scanner) == '\t' ||
sieve_lexer_curchar(scanner) == '\r' ||
sieve_lexer_curchar(scanner) == '\n' ||
sieve_lexer_curchar(scanner) == ' ' ) {
sieve_lexer_shift(scanner);
}
lexer->token_type = STT_WHITESPACE;
return TRUE;
case '"':
sieve_lexer_shift(scanner);
str_truncate(lexer->token_str_value, 0);
str = lexer->token_str_value;
while ( sieve_lexer_curchar(scanner) != '"' ) {
if ( sieve_lexer_curchar(scanner) == '\\' ) {
sieve_lexer_shift(scanner);
}
switch ( sieve_lexer_curchar(scanner) ) {
case -1:
sieve_lexer_error(lexer,
"end of file before end of quoted string "
"started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
case '\0':
sieve_lexer_error(lexer,
"encountered NUL character in quoted string "
"started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
case '\r':
sieve_lexer_shift(scanner);
if ( sieve_lexer_curchar(scanner) != '\n' ) {
sieve_lexer_error(lexer,
"found stray carriage-return (CR) character "
"in quoted string started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
}
if ( str_len(str) <= SIEVE_MAX_STRING_LEN )
str_append(str, "\r\n");
break;
case '\n':
if ( str_len(str) <= SIEVE_MAX_STRING_LEN )
str_append(str, "\r\n");
break;
default:
if ( str_len(str) <= SIEVE_MAX_STRING_LEN )
str_append_c(str, sieve_lexer_curchar(scanner));
}
sieve_lexer_shift(scanner);
}
sieve_lexer_shift(scanner);
if ( str_len(str) > SIEVE_MAX_STRING_LEN ) {
sieve_lexer_error(lexer,
"quoted string started at line %d is too long "
"(longer than %llu bytes)", lexer->token_line,
(long long) SIEVE_MAX_STRING_LEN);
lexer->token_type = STT_ERROR;
return FALSE;
}
lexer->token_type = STT_STRING;
return TRUE;
case ']':
sieve_lexer_shift(scanner);
lexer->token_type = STT_RSQUARE;
return TRUE;
case '[':
sieve_lexer_shift(scanner);
lexer->token_type = STT_LSQUARE;
return TRUE;
case '}':
sieve_lexer_shift(scanner);
lexer->token_type = STT_RCURLY;
return TRUE;
case '{':
sieve_lexer_shift(scanner);
lexer->token_type = STT_LCURLY;
return TRUE;
case ')':
sieve_lexer_shift(scanner);
lexer->token_type = STT_RBRACKET;
return TRUE;
case '(':
sieve_lexer_shift(scanner);
lexer->token_type = STT_LBRACKET;
return TRUE;
case ';':
sieve_lexer_shift(scanner);
lexer->token_type = STT_SEMICOLON;
return TRUE;
case ',':
sieve_lexer_shift(scanner);
lexer->token_type = STT_COMMA;
return TRUE;
case -1:
lexer->token_type = STT_EOF;
return TRUE;
default:
if ( i_isdigit(sieve_lexer_curchar(scanner)) ) {
sieve_number_t value = DIGIT_VAL(sieve_lexer_curchar(scanner));
bool overflow = FALSE;
sieve_lexer_shift(scanner);
while ( i_isdigit(sieve_lexer_curchar(scanner)) ) {
sieve_number_t valnew =
value * 10 + DIGIT_VAL(sieve_lexer_curchar(scanner));
if ( valnew < value )
overflow = TRUE;
value = valnew;
sieve_lexer_shift(scanner);
}
switch ( sieve_lexer_curchar(scanner) ) {
case 'k':
case 'K':
if ( value > (SIEVE_MAX_NUMBER >> 10) )
overflow = TRUE;
else
value = value << 10;
sieve_lexer_shift(scanner);
break;
case 'm':
case 'M':
if ( value > (SIEVE_MAX_NUMBER >> 20) )
overflow = TRUE;
else
value = value << 20;
sieve_lexer_shift(scanner);
break;
case 'g':
case 'G':
if ( value > (SIEVE_MAX_NUMBER >> 30) )
overflow = TRUE;
else
value = value << 30;
sieve_lexer_shift(scanner);
break;
default:
break;
}
if ( overflow ) {
sieve_lexer_error(lexer,
"number exceeds integer limits (max %llu)",
(long long) SIEVE_MAX_NUMBER);
lexer->token_type = STT_ERROR;
return FALSE;
}
lexer->token_type = STT_NUMBER;
lexer->token_int_value = value;
return TRUE;
} else if ( i_isalpha(sieve_lexer_curchar(scanner)) ||
sieve_lexer_curchar(scanner) == '_' ||
sieve_lexer_curchar(scanner) == ':' ) {
enum sieve_token_type type = STT_IDENTIFIER;
str_truncate(lexer->token_str_value,0);
str = lexer->token_str_value;
if ( sieve_lexer_curchar(scanner) == ':' ) {
sieve_lexer_shift(scanner); type = STT_TAG;
if ( i_isalpha(sieve_lexer_curchar(scanner)) ||
sieve_lexer_curchar(scanner) == '_' ) {
str_append_c(str, sieve_lexer_curchar(scanner));
sieve_lexer_shift(scanner);
} else {
lexer->token_type = STT_COLON;
return TRUE;
}
} else {
str_append_c(str, sieve_lexer_curchar(scanner));
sieve_lexer_shift(scanner);
}
while ( i_isalnum(sieve_lexer_curchar(scanner)) ||
sieve_lexer_curchar(scanner) == '_' ) {
if ( str_len(str) <= SIEVE_MAX_IDENTIFIER_LEN ) {
str_append_c(str, sieve_lexer_curchar(scanner));
}
sieve_lexer_shift(scanner);
}
if ( sieve_lexer_curchar(scanner) == ':' &&
type == STT_IDENTIFIER && str_len(str) == 4 &&
strncasecmp(str_c(str), "text", 4) == 0 ) {
sieve_lexer_shift(scanner);
while ( sieve_lexer_curchar(scanner) == ' ' ||
sieve_lexer_curchar(scanner) == '\t' )
sieve_lexer_shift(scanner);
switch ( sieve_lexer_curchar(scanner) ) {
case '#':
while ( sieve_lexer_curchar(scanner) != '\n' )
sieve_lexer_shift(scanner);
break;
case '\r':
sieve_lexer_shift(scanner);
break;
}
switch ( sieve_lexer_curchar(scanner) ) {
case '\n':
sieve_lexer_shift(scanner);
break;
case -1:
sieve_lexer_error(lexer,
"end of file before end of multi-line string");
lexer->token_type = STT_ERROR;
return FALSE;
default:
sieve_lexer_error(lexer,
"invalid character %s after 'text:' in multiline string",
_char_sanitize(sieve_lexer_curchar(scanner)));
lexer->token_type = STT_ERROR;
return FALSE;
}
str_truncate(str, 0);
while ( TRUE ) {
bool cr_shifted = FALSE;
if ( sieve_lexer_curchar(scanner) == '.' ) {
sieve_lexer_shift(scanner);
if ( sieve_lexer_curchar(scanner) == '\r' ) {
sieve_lexer_shift(scanner);
cr_shifted = TRUE;
}
if ( sieve_lexer_curchar(scanner) == '\n' ) {
sieve_lexer_shift(scanner);
if ( str_len(str) > SIEVE_MAX_STRING_LEN ) {
sieve_lexer_error(lexer,
"multi-line string started at line %d is too long "
"(longer than %llu bytes)", lexer->token_line,
(long long) SIEVE_MAX_STRING_LEN);
lexer->token_type = STT_ERROR;
return FALSE;
}
lexer->token_type = STT_STRING;
return TRUE;
} else if ( cr_shifted ) {
sieve_lexer_error(lexer,
"found stray carriage-return (CR) character "
"in multi-line string started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
}
if ( str_len(str) <= SIEVE_MAX_STRING_LEN )
str_append_c(str, '.');
if ( sieve_lexer_curchar(scanner) == '.' )
sieve_lexer_shift(scanner);
}
while ( sieve_lexer_curchar(scanner) != '\n' &&
sieve_lexer_curchar(scanner) != '\r' ) {
switch ( sieve_lexer_curchar(scanner) ) {
case -1:
sieve_lexer_error(lexer,
"end of file before end of multi-line string");
lexer->token_type = STT_ERROR;
return FALSE;
case '\0':
sieve_lexer_error(lexer,
"encountered NUL character in quoted string "
"started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
default:
if ( str_len(str) <= SIEVE_MAX_STRING_LEN )
str_append_c(str, sieve_lexer_curchar(scanner));
}
sieve_lexer_shift(scanner);
}
if ( sieve_lexer_curchar(scanner) == '\r' ) {
sieve_lexer_shift(scanner);
}
if ( sieve_lexer_curchar(scanner) != '\n' ) {
sieve_lexer_error(lexer,
"found stray carriage-return (CR) character "
"in multi-line string started at line %d", lexer->token_line);
lexer->token_type = STT_ERROR;
return FALSE;
}
if ( str_len(str) <= SIEVE_MAX_STRING_LEN )
str_append(str, "\r\n");
sieve_lexer_shift(scanner);
}
i_unreached();
lexer->token_type = STT_ERROR;
return FALSE;
}
if ( str_len(str) > SIEVE_MAX_IDENTIFIER_LEN ) {
sieve_lexer_error(lexer,
"encountered impossibly long %s%s'",
(type == STT_TAG ? "tag identifier ':" : "identifier '"),
str_sanitize(str_c(str), SIEVE_MAX_IDENTIFIER_LEN));
lexer->token_type = STT_ERROR;
return FALSE;
}
lexer->token_type = type;
return TRUE;
}
if ( lexer->token_type != STT_GARBAGE )
sieve_lexer_error(lexer, "unexpected character(s) starting with %s",
_char_sanitize(sieve_lexer_curchar(scanner)));
sieve_lexer_shift(scanner);
lexer->token_type = STT_GARBAGE;
return FALSE;
}
}
bool sieve_lexer_skip_token(const struct sieve_lexer *lexer)
{
do {
if ( !sieve_lexer_scan_raw_token(lexer->scanner) )
return FALSE;
} while ( lexer->token_type == STT_WHITESPACE );
return TRUE;
}