#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "message.h"
#include "xgettext.h"
#include "x-elisp.h"
#include "error.h"
#include "xalloc.h"
#include "exit.h"
#include "hash.h"
#include "c-ctype.h"
#include "gettext.h"
#define _(s) gettext(s)
static bool extract_all = false;
static hash_table keywords;
static bool default_keywords = true;
void
x_elisp_extract_all ()
{
extract_all = true;
}
void
x_elisp_keyword (const char *name)
{
if (name == NULL)
default_keywords = false;
else
{
const char *end;
int argnum1;
int argnum2;
const char *colon;
if (keywords.table == NULL)
init_hash (&keywords, 100);
split_keywordspec (name, &end, &argnum1, &argnum2);
colon = strchr (name, ':');
if (colon == NULL || colon >= end)
{
if (argnum1 == 0)
argnum1 = 1;
insert_entry (&keywords, name, end - name,
(void *) (long) (argnum1 + (argnum2 << 10)));
}
}
}
static void
init_keywords ()
{
if (default_keywords)
{
x_elisp_keyword ("_");
default_keywords = false;
}
}
void
init_flag_table_elisp ()
{
xgettext_record_flag ("_:1:pass-elisp-format");
xgettext_record_flag ("format:1:elisp-format");
}
static const char *real_file_name;
static char *logical_file_name;
static int line_number;
static FILE *fp;
static int
do_getc ()
{
int c = getc (fp);
if (c == EOF)
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("\
error while reading \"%s\""), real_file_name);
}
else if (c == '\n')
line_number++;
return c;
}
static void
do_ungetc (int c)
{
if (c == '\n')
line_number--;
ungetc (c, fp);
}
struct token
{
int allocated;
int charcount;
char *chars;
};
static inline void
init_token (struct token *tp)
{
tp->allocated = 10;
tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
tp->charcount = 0;
}
static inline void
free_token (struct token *tp)
{
free (tp->chars);
}
static inline void
grow_token (struct token *tp)
{
if (tp->charcount == tp->allocated)
{
tp->allocated *= 2;
tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
}
}
static inline bool
is_integer (const char *p)
{
const char *p_start = p;
if (*p == '+' || *p == '-')
p++;
if (*p == '\0')
return false;
while (*p >= '0' && *p <= '9')
p++;
if (p > p_start && *p == '.')
p++;
return (*p == '\0');
}
static inline bool
is_float (const char *p)
{
enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
int state;
state = 0;
if (*p == '+' || *p == '-')
p++;
if (*p >= '0' && *p <= '9')
{
state |= LEAD_INT;
do
p++;
while (*p >= '0' && *p <= '9');
}
if (*p == '.')
{
state |= DOT_CHAR;
p++;
}
if (*p >= '0' && *p <= '9')
{
state |= TRAIL_INT;
do
p++;
while (*p >= '0' && *p <= '9');
}
if (*p == 'e' || *p == 'E')
{
state |= E_CHAR;
p++;
if (*p == '+' || *p == '-')
p++;
if (*p >= '0' && *p <= '9')
{
state |= EXP_INT;
do
p++;
while (*p >= '0' && *p <= '9');
}
else if (p[-1] == '+'
&& ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
|| (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
{
state |= EXP_INT;
p += 3;
}
}
return (*p == '\0')
&& (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
|| state == (DOT_CHAR | TRAIL_INT)
|| state == (LEAD_INT | E_CHAR | EXP_INT)
|| state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
|| state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
}
static bool
read_token (struct token *tp, int first)
{
int c;
bool quoted = false;
init_token (tp);
c = first;
for (;; c = do_getc ())
{
if (c == EOF)
break;
if (c <= ' ')
break;
if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
|| c == '[' || c == ']' || c == '#')
break;
if (c == '\\')
{
quoted = true;
c = do_getc ();
if (c == EOF)
break;
}
grow_token (tp);
tp->chars[tp->charcount++] = c;
}
if (c != EOF)
do_ungetc (c);
if (quoted)
return true;
grow_token (tp);
tp->chars[tp->charcount] = '\0';
if (is_integer (tp->chars) || is_float (tp->chars))
return false;
else
return true;
}
static char *buffer;
static size_t bufmax;
static size_t buflen;
static inline void
comment_start ()
{
buflen = 0;
}
static inline void
comment_add (int c)
{
if (buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen++] = c;
}
static inline void
comment_line_end (size_t chars_to_remove)
{
buflen -= chars_to_remove;
while (buflen >= 1
&& (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
--buflen;
if (chars_to_remove == 0 && buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen] = '\0';
xgettext_comment_add (buffer);
}
static int last_comment_line;
static int last_non_comment_line;
static message_list_ty *mlp;
enum object_type
{
t_symbol,
t_string,
t_other,
t_dot,
t_listclose,
t_vectorclose,
t_eof
};
struct object
{
enum object_type type;
struct token *token;
int line_number_at_start;
};
static inline void
free_object (struct object *op)
{
if (op->type == t_symbol || op->type == t_string)
{
free_token (op->token);
free (op->token);
}
}
static char *
string_of_object (const struct object *op)
{
char *str;
int n;
if (!(op->type == t_symbol || op->type == t_string))
abort ();
n = op->token->charcount;
str = (char *) xmalloc (n + 1);
memcpy (str, op->token->chars, n);
str[n] = '\0';
return str;
}
static flag_context_list_table_ty *flag_context_list_table;
#define IGNORABLE_ESCAPE (EOF - 1)
static int
do_getc_escaped (int c, bool in_string)
{
switch (c)
{
case 'a':
return '\a';
case 'b':
return '\b';
case 'd':
return 0x7F;
case 'e':
return 0x1B;
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
case '\n':
return IGNORABLE_ESCAPE;
case ' ':
return (in_string ? IGNORABLE_ESCAPE : ' ');
case 'M':
c = do_getc ();
if (c == EOF)
return EOF;
if (c != '-')
return c;
c = do_getc ();
if (c == EOF)
return EOF;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
return EOF;
c = do_getc_escaped (c, false);
}
return c | 0x80;
case 'S':
c = do_getc ();
if (c == EOF)
return EOF;
if (c != '-')
return c;
c = do_getc ();
if (c == EOF)
return EOF;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
return EOF;
c = do_getc_escaped (c, false);
}
return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
case 'H':
case 'A':
case 's':
c = do_getc ();
if (c == EOF)
return EOF;
if (c != '-')
return c;
c = do_getc ();
if (c == EOF)
return EOF;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
return EOF;
c = do_getc_escaped (c, false);
}
return c;
case 'C':
c = do_getc ();
if (c == EOF)
return EOF;
if (c != '-')
return c;
case '^':
c = do_getc ();
if (c == EOF)
return EOF;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
return EOF;
c = do_getc_escaped (c, false);
}
if (c == '?')
return 0x7F;
if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
return c & 0x9F;
if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
return c & 0x9F;
#if 0
if (c == ' ')
return 0x00;
#endif
return c;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
{
int n = c - '0';
c = do_getc ();
if (c != EOF)
{
if (c >= '0' && c <= '7')
{
n = (n << 3) + (c - '0');
c = do_getc ();
if (c != EOF)
{
if (c >= '0' && c <= '7')
n = (n << 3) + (c - '0');
else
do_ungetc (c);
}
}
else
do_ungetc (c);
}
return (unsigned char) n;
}
case 'x':
{
int n = 0;
for (;;)
{
c = do_getc ();
if (c == EOF)
break;
else if (c >= '0' && c <= '9')
n = (n << 4) + (c - '0');
else if (c >= 'A' && c <= 'F')
n = (n << 4) + (c - 'A' + 10);
else if (c >= 'a' && c <= 'f')
n = (n << 4) + (c - 'a' + 10);
else
{
do_ungetc (c);
break;
}
}
return (unsigned char) n;
}
default:
return c;
}
}
static void
read_object (struct object *op, bool first_in_list, bool new_backquote_flag,
flag_context_ty outer_context)
{
for (;;)
{
int c;
c = do_getc ();
switch (c)
{
case EOF:
op->type = t_eof;
return;
case '\n':
if (last_non_comment_line > last_comment_line)
xgettext_comment_reset ();
continue;
case '(':
{
int arg = 0;
flag_context_list_iterator_ty context_iter;
int argnum1 = 0;
int argnum2 = 0;
message_ty *plural_mp = NULL;
for (;; arg++)
{
struct object inner;
flag_context_ty inner_context;
if (arg == 0)
inner_context = null_context;
else
inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (
&context_iter));
read_object (&inner, arg == 0, new_backquote_flag,
inner_context);
if (inner.type == t_listclose)
{
op->type = t_other;
last_non_comment_line = line_number;
return;
}
if (inner.type == t_eof)
break;
if (arg == 0)
{
if (inner.type == t_symbol)
{
char *symbol_name = string_of_object (&inner);
void *keyword_value;
if (find_entry (&keywords,
symbol_name, strlen (symbol_name),
&keyword_value)
== 0)
{
argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
argnum2 = (int) (long) keyword_value >> 10;
}
context_iter =
flag_context_list_iterator (
flag_context_list_table_lookup (
flag_context_list_table,
symbol_name, strlen (symbol_name)));
free (symbol_name);
}
else
context_iter = null_context_list_iterator;
}
else
{
if (arg == argnum1)
{
if (inner.type == t_string)
{
lex_pos_ty pos;
message_ty *mp;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
mp = remember_a_message (mlp, string_of_object (&inner),
inner_context, &pos);
if (argnum2 > 0)
plural_mp = mp;
}
}
else if (arg == argnum2)
{
if (inner.type == t_string && plural_mp != NULL)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
remember_a_message_plural (plural_mp, string_of_object (&inner),
inner_context, &pos);
}
}
}
free_object (&inner);
}
}
op->type = t_other;
last_non_comment_line = line_number;
return;
case ')':
op->type = t_listclose;
last_non_comment_line = line_number;
return;
case '[':
{
for (;;)
{
struct object inner;
read_object (&inner, false, new_backquote_flag, null_context);
if (inner.type == t_vectorclose)
{
op->type = t_other;
last_non_comment_line = line_number;
return;
}
if (inner.type == t_eof)
break;
free_object (&inner);
}
}
op->type = t_other;
last_non_comment_line = line_number;
return;
case ']':
op->type = t_vectorclose;
last_non_comment_line = line_number;
return;
case '\'':
{
struct object inner;
read_object (&inner, false, new_backquote_flag, null_context);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '`':
if (first_in_list)
goto default_label;
{
struct object inner;
read_object (&inner, false, true, null_context);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case ',':
if (!new_backquote_flag)
goto default_label;
{
int c = do_getc ();
if (c != EOF && c != '@' && c != '.')
do_ungetc (c);
}
{
struct object inner;
read_object (&inner, false, false, null_context);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case ';':
{
bool all_semicolons = true;
last_comment_line = line_number;
comment_start ();
for (;;)
{
int c = do_getc ();
if (c == EOF || c == '\n')
break;
if (c != ';')
all_semicolons = false;
if (!all_semicolons)
{
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
}
}
comment_line_end (0);
continue;
}
case '"':
{
op->token = (struct token *) xmalloc (sizeof (struct token));
init_token (op->token);
op->line_number_at_start = line_number;
for (;;)
{
int c = do_getc ();
if (c == EOF)
break;
if (c == '"')
break;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
break;
c = do_getc_escaped (c, true);
if (c == EOF)
break;
if (c == IGNORABLE_ESCAPE)
;
else
{
grow_token (op->token);
op->token->chars[op->token->charcount++] = c;
}
}
else
{
grow_token (op->token);
op->token->chars[op->token->charcount++] = c;
}
}
op->type = t_string;
if (extract_all)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = op->line_number_at_start;
remember_a_message (mlp, string_of_object (op),
null_context, &pos);
}
last_non_comment_line = line_number;
return;
}
case '?':
c = do_getc ();
if (c == EOF)
;
else if (c == '\\')
{
c = do_getc ();
if (c == EOF)
;
else
{
c = do_getc_escaped (c, false);
if (c == EOF)
;
}
}
op->type = t_other;
last_non_comment_line = line_number;
return;
case '#':
c = do_getc ();
if (c == EOF)
{
op->type = t_other;
return;
}
switch (c)
{
case '^':
c = do_getc ();
if (c == '^')
c = do_getc ();
if (c == '[')
{
for (;;)
{
struct object inner;
read_object (&inner, false, new_backquote_flag,
null_context);
if (inner.type == t_vectorclose)
{
op->type = t_other;
last_non_comment_line = line_number;
return;
}
if (inner.type == t_eof)
break;
free_object (&inner);
}
op->type = t_other;
last_non_comment_line = line_number;
return;
}
else
{
op->type = t_other;
if (c != EOF)
last_non_comment_line = line_number;
return;
}
case '&':
{
struct object length;
read_object (&length, first_in_list, new_backquote_flag,
null_context);
free_object (&length);
}
c = do_getc ();
if (c == '"')
{
struct object string;
read_object (&string, first_in_list, new_backquote_flag,
null_context);
free_object (&string);
}
else
do_ungetc (c);
op->type = t_other;
last_non_comment_line = line_number;
return;
case '[':
case '(':
{
struct object inner;
do_ungetc (c);
read_object (&inner, false, new_backquote_flag, null_context);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '@':
{
unsigned int nskip = 0;
for (;;)
{
c = do_getc ();
if (!(c >= '0' && c <= '9'))
break;
nskip = 10 * nskip + (c - '0');
}
if (c != EOF)
{
do_ungetc (c);
for (; nskip > 0; nskip--)
if (do_getc () == EOF)
break;
}
continue;
}
case '$':
op->type = t_other;
last_non_comment_line = line_number;
return;
case '\'':
case ':':
case 'S': case 's':
{
struct object inner;
read_object (&inner, false, new_backquote_flag, null_context);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
for (;;)
{
c = do_getc ();
if (!(c >= '0' && c <= '9'))
break;
}
if (c == EOF)
{
op->type = t_other;
return;
}
if (c == '=')
{
read_object (op, false, new_backquote_flag, outer_context);
last_non_comment_line = line_number;
return;
}
if (c == '#')
{
op->type = t_other;
last_non_comment_line = line_number;
return;
}
if (c == 'R' || c == 'r')
{
c = do_getc ();
if (c == '+' || c == '-')
c = do_getc ();
for (; c != EOF; c = do_getc ())
if (!c_isalnum (c))
{
do_ungetc (c);
break;
}
op->type = t_other;
last_non_comment_line = line_number;
return;
}
op->type = t_other;
last_non_comment_line = line_number;
return;
case 'X': case 'x':
case 'O': case 'o':
case 'B': case 'b':
{
c = do_getc ();
if (c == '+' || c == '-')
c = do_getc ();
for (; c != EOF; c = do_getc ())
if (!c_isalnum (c))
{
do_ungetc (c);
break;
}
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '*':
{
do
c = do_getc ();
while (c == '0' || c == '1');
if (c != EOF)
do_ungetc (c);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '+':
case '-':
{
struct object inner;
read_object (&inner, false, new_backquote_flag, null_context);
free_object (&inner);
continue;
}
default:
op->type = t_other;
last_non_comment_line = line_number;
return;
}
abort ();
case '.':
c = do_getc ();
if (c != EOF)
{
do_ungetc (c);
if (c <= ' '
|| strchr ("\"'`,(", c) != NULL)
{
op->type = t_dot;
last_non_comment_line = line_number;
return;
}
}
c = '.';
default:
default_label:
if (c <= ' ')
continue;
{
bool symbol;
op->token = (struct token *) xmalloc (sizeof (struct token));
symbol = read_token (op->token, c);
if (symbol)
{
op->type = t_symbol;
last_non_comment_line = line_number;
return;
}
else
{
free_token (op->token);
free (op->token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
}
}
}
}
void
extract_elisp (FILE *f,
const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
mlp = mdlp->item[0]->messages;
fp = f;
real_file_name = real_filename;
logical_file_name = xstrdup (logical_filename);
line_number = 1;
last_comment_line = -1;
last_non_comment_line = -1;
flag_context_list_table = flag_table;
init_keywords ();
do
{
struct object toplevel_object;
read_object (&toplevel_object, false, false, null_context);
if (toplevel_object.type == t_eof)
break;
free_object (&toplevel_object);
}
while (!feof (fp));
fp = NULL;
real_file_name = NULL;
logical_file_name = NULL;
line_number = 0;
}