#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "message.h"
#include "xgettext.h"
#include "x-lisp.h"
#include "error.h"
#include "xalloc.h"
#include "exit.h"
#include "hash.h"
#include "gettext.h"
#define _(s) gettext(s)
enum rtcase
{
case_upcase,
case_downcase,
case_preserve,
case_invert
};
static enum rtcase readtable_case = case_upcase;
static int read_base = 10;
static bool read_preserve_whitespace = true;
static bool extract_all = false;
static hash_table keywords;
static bool default_keywords = true;
void
x_lisp_extract_all ()
{
extract_all = true;
}
void
x_lisp_keyword (const char *name)
{
if (name == NULL)
default_keywords = false;
else
{
const char *end;
int argnum1;
int argnum2;
const char *colon;
size_t len;
char *symname;
size_t i;
if (keywords.table == NULL)
init_hash (&keywords, 100);
split_keywordspec (name, &end, &argnum1, &argnum2);
colon = strchr (name, ':');
if (colon != NULL && colon < end)
{
name = colon + 1;
if (name < end && *name == ':')
name++;
colon = strchr (name, ':');
if (colon != NULL && colon < end)
return;
}
len = end - name;
symname = (char *) xmalloc (len);
for (i = 0; i < len; i++)
symname[i] =
(name[i] >= 'a' && name[i] <= 'z' ? name[i] - 'a' + 'A' : name[i]);
if (argnum1 == 0)
argnum1 = 1;
insert_entry (&keywords, symname, len,
(void *) (long) (argnum1 + (argnum2 << 10)));
}
}
static void
init_keywords ()
{
if (default_keywords)
{
x_lisp_keyword ("gettext");
x_lisp_keyword ("ngettext:1,2");
x_lisp_keyword ("gettext-noop");
default_keywords = false;
}
}
void
init_flag_table_lisp ()
{
xgettext_record_flag ("gettext:1:pass-lisp-format");
xgettext_record_flag ("ngettext:1:pass-lisp-format");
xgettext_record_flag ("ngettext:2:pass-lisp-format");
xgettext_record_flag ("gettext-noop:1:pass-lisp-format");
xgettext_record_flag ("format:2:lisp-format");
}
static const char *real_file_name;
static char *logical_file_name;
static int line_number;
static FILE *fp;
static int
do_getc ()
{
int c = getc (fp);
if (c == EOF)
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("\
error while reading \"%s\""), real_file_name);
}
else if (c == '\n')
line_number++;
return c;
}
static void
do_ungetc (int c)
{
if (c == '\n')
line_number--;
ungetc (c, fp);
}
enum syntax_code
{
syntax_illegal,
syntax_single_esc,
syntax_multi_esc,
syntax_constituent,
syntax_whitespace,
syntax_eof,
syntax_t_macro,
syntax_nt_macro
};
static enum syntax_code
syntax_code_of (unsigned char c)
{
switch (c)
{
case '\\':
return syntax_single_esc;
case '|':
return syntax_multi_esc;
case '\t': case '\n': case '\f': case '\r': case ' ':
return syntax_whitespace;
case '(': case ')': case '\'': case '"': case ',': case ';': case '`':
return syntax_t_macro;
case '#':
return syntax_nt_macro;
default:
if (c < ' ' && c != '\b')
return syntax_illegal;
else
return syntax_constituent;
}
}
struct char_syntax
{
int ch;
enum syntax_code scode;
};
static void
read_char_syntax (struct char_syntax *p)
{
int c = do_getc ();
p->ch = c;
p->scode = (c == EOF ? syntax_eof : syntax_code_of (c));
}
enum attribute
{
a_illg,
a_pack_m,
a_alpha,
a_escaped,
a_ratio,
a_dot,
a_sign,
a_extens,
a_digit,
a_letterdigit,
a_expodigit,
a_letter,
a_expo
};
#define is_letter_attribute(a) ((a) >= a_letter)
#define is_number_attribute(a) ((a) >= a_ratio)
static enum attribute
attribute_of (unsigned char c)
{
switch (c)
{
case ':':
return a_pack_m;
case '/':
return a_ratio;
case '.':
return a_dot;
case '+': case '-':
return a_sign;
case '_': case '^':
return a_extens;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return a_digit;
case 'a': case 'b': case 'c': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
return a_letter;
case 'e': case 's': case 'd': case 'f': case 'l':
case 'E': case 'S': case 'D': case 'F': case 'L':
return a_expo;
default:
return a_alpha;
}
}
struct token_char
{
unsigned char ch;
unsigned char attribute;
};
struct token
{
int allocated;
int charcount;
struct token_char *chars;
bool with_escape;
};
static inline void
init_token (struct token *tp)
{
tp->allocated = 10;
tp->chars =
(struct token_char *) xmalloc (tp->allocated * sizeof (struct token_char));
tp->charcount = 0;
}
static inline void
free_token (struct token *tp)
{
free (tp->chars);
}
static inline void
grow_token (struct token *tp)
{
if (tp->charcount == tp->allocated)
{
tp->allocated *= 2;
tp->chars = (struct token_char *) xrealloc (tp->chars, tp->allocated * sizeof (struct token_char));
}
}
static void
read_token (struct token *tp, const struct char_syntax *first)
{
bool multiple_escape_flag;
struct char_syntax curr;
init_token (tp);
tp->with_escape = false;
multiple_escape_flag = false;
if (first)
curr = *first;
else
read_char_syntax (&curr);
for (;; read_char_syntax (&curr))
{
switch (curr.scode)
{
case syntax_illegal:
do_ungetc (curr.ch);
return;
case syntax_single_esc:
tp->with_escape = true;
read_char_syntax (&curr);
if (curr.scode == syntax_eof)
return;
grow_token (tp);
tp->chars[tp->charcount].ch = curr.ch;
tp->chars[tp->charcount].attribute = a_escaped;
tp->charcount++;
break;
case syntax_multi_esc:
multiple_escape_flag = !multiple_escape_flag;
tp->with_escape = true;
break;
case syntax_constituent:
case syntax_nt_macro:
grow_token (tp);
if (multiple_escape_flag)
{
tp->chars[tp->charcount].ch = curr.ch;
tp->chars[tp->charcount].attribute = a_escaped;
tp->charcount++;
}
else
{
tp->chars[tp->charcount].ch = curr.ch;
tp->chars[tp->charcount].attribute = attribute_of (curr.ch);
tp->charcount++;
}
break;
case syntax_whitespace:
case syntax_t_macro:
if (multiple_escape_flag)
{
grow_token (tp);
tp->chars[tp->charcount].ch = curr.ch;
tp->chars[tp->charcount].attribute = a_escaped;
tp->charcount++;
}
else
{
if (curr.scode != syntax_whitespace || read_preserve_whitespace)
do_ungetc (curr.ch);
return;
}
break;
case syntax_eof:
if (multiple_escape_flag)
;
return;
}
}
}
static inline bool
has_a_dot (const struct token *tp)
{
int n = tp->charcount;
int i;
for (i = 0; i < n; i++)
if (tp->chars[i].attribute == a_dot)
return true;
return false;
}
static inline bool
all_a_number (const struct token *tp)
{
int n = tp->charcount;
int i;
for (i = 0; i < n; i++)
if (!is_number_attribute (tp->chars[i].attribute))
return false;
return true;
}
static inline void
a_letter_to_digit (const struct token *tp, int base)
{
int n = tp->charcount;
int i;
for (i = 0; i < n; i++)
if (is_letter_attribute (tp->chars[i].attribute))
{
int c = tp->chars[i].ch;
if (c >= 'a')
c -= 'a' - 'A';
if (c - 'A' + 10 < base)
tp->chars[i].attribute -= 2;
}
}
static inline bool
has_a_digit (const struct token *tp)
{
int n = tp->charcount;
int i;
for (i = 0; i < n; i++)
if (tp->chars[i].attribute == a_digit
|| tp->chars[i].attribute == a_letterdigit
|| tp->chars[i].attribute == a_expodigit)
return true;
return false;
}
static inline bool
has_adjacent_letters (const struct token *tp)
{
int n = tp->charcount;
int i;
for (i = 1; i < n; i++)
if (is_letter_attribute (tp->chars[i-1].attribute)
&& is_letter_attribute (tp->chars[i].attribute))
return true;
return false;
}
static bool
is_potential_number (const struct token *tp, int *basep)
{
if (tp->with_escape)
return false;
if (has_a_dot (tp))
*basep = 10;
if (!all_a_number (tp))
return false;
a_letter_to_digit (tp, *basep);
if (!has_a_digit (tp))
return false;
if (has_adjacent_letters (tp))
return false;
if (!(tp->chars[0].attribute >= a_dot
&& tp->chars[0].attribute <= a_expodigit))
return false;
if (tp->chars[tp->charcount - 1].attribute == a_sign)
return false;
return true;
}
enum number_type
{
n_none,
n_integer,
n_ratio,
n_float
};
static enum number_type
is_number (const struct token *tp, int *basep)
{
struct token_char *ptr_limit;
struct token_char *ptr1;
if (!is_potential_number (tp, basep))
return n_none;
ptr1 = &tp->chars[0];
ptr_limit = &tp->chars[tp->charcount];
if (ptr1->attribute == a_sign)
ptr1++;
{
bool seen_a_ratio = false;
bool seen_a_digit = false;
struct token_char *ptr;
for (ptr = ptr1;; ptr++)
{
if (ptr >= ptr_limit)
{
if (!seen_a_digit)
break;
if (seen_a_ratio)
return n_ratio;
else
return n_integer;
}
if (ptr->attribute == a_digit
|| ptr->attribute == a_letterdigit
|| ptr->attribute == a_expodigit)
{
int c = ptr->ch;
c = (c < 'A' ? c - '0' : c < 'a' ? c - 'A' + 10 : c - 'a' + 10);
if (c >= *basep)
break;
seen_a_digit = true;
}
else if (ptr->attribute == a_ratio)
{
if (seen_a_ratio || !seen_a_digit)
break;
seen_a_ratio = true;
seen_a_digit = false;
}
else
break;
}
}
*basep = 10;
{
bool seen_a_dot = false;
bool seen_a_dot_with_leading_digits = false;
bool seen_a_digit = false;
struct token_char *ptr;
for (ptr = ptr1;; ptr++)
{
if (ptr >= ptr_limit)
{
if (!seen_a_dot)
return n_none;
if (seen_a_digit)
return n_float;
if (seen_a_dot_with_leading_digits)
return n_integer;
else
return n_none;
}
if (ptr->attribute == a_digit)
{
seen_a_digit = true;
}
else if (ptr->attribute == a_dot)
{
if (seen_a_dot)
return n_none;
seen_a_dot = true;
if (seen_a_digit)
seen_a_dot_with_leading_digits = true;
seen_a_digit = false;
}
else if (ptr->attribute == a_expo || ptr->attribute == a_expodigit)
break;
else
return n_none;
}
ptr++;
if (!seen_a_dot_with_leading_digits || !seen_a_digit)
return n_none;
if (ptr >= ptr_limit)
return n_none;
if (ptr->attribute == a_sign)
ptr++;
seen_a_digit = false;
for (;; ptr++)
{
if (ptr >= ptr_limit)
break;
if (ptr->attribute != a_digit)
return n_none;
seen_a_digit = true;
}
if (!seen_a_digit)
return n_none;
return n_float;
}
}
static void
upcase_token (struct token *tp)
{
int n = tp->charcount;
int i;
for (i = 0; i < n; i++)
if (tp->chars[i].attribute != a_escaped)
{
unsigned char c = tp->chars[i].ch;
if (c >= 'a' && c <= 'z')
tp->chars[i].ch = c - 'a' + 'A';
}
}
static void
downcase_token (struct token *tp)
{
int n = tp->charcount;
int i;
for (i = 0; i < n; i++)
if (tp->chars[i].attribute != a_escaped)
{
unsigned char c = tp->chars[i].ch;
if (c >= 'A' && c <= 'Z')
tp->chars[i].ch = c - 'A' + 'a';
}
}
static void
case_convert_token (struct token *tp)
{
int n = tp->charcount;
int i;
switch (readtable_case)
{
case case_upcase:
upcase_token (tp);
break;
case case_downcase:
downcase_token (tp);
break;
case case_preserve:
break;
case case_invert:
{
bool seen_uppercase = false;
bool seen_lowercase = false;
for (i = 0; i < n; i++)
if (tp->chars[i].attribute != a_escaped)
{
unsigned char c = tp->chars[i].ch;
if (c >= 'a' && c <= 'z')
seen_lowercase = true;
if (c >= 'A' && c <= 'Z')
seen_uppercase = true;
}
if (seen_uppercase)
{
if (!seen_lowercase)
downcase_token (tp);
}
else
{
if (seen_lowercase)
upcase_token (tp);
}
}
break;
}
}
static char *buffer;
static size_t bufmax;
static size_t buflen;
static inline void
comment_start ()
{
buflen = 0;
}
static inline void
comment_add (int c)
{
if (buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen++] = c;
}
static inline void
comment_line_end (size_t chars_to_remove)
{
buflen -= chars_to_remove;
while (buflen >= 1
&& (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
--buflen;
if (chars_to_remove == 0 && buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen] = '\0';
xgettext_comment_add (buffer);
}
static int last_comment_line;
static int last_non_comment_line;
static message_list_ty *mlp;
enum object_type
{
t_symbol,
t_string,
t_other,
t_dot,
t_close,
t_eof
};
struct object
{
enum object_type type;
struct token *token;
int line_number_at_start;
};
static inline void
free_object (struct object *op)
{
if (op->type == t_symbol || op->type == t_string)
{
free_token (op->token);
free (op->token);
}
}
static char *
string_of_object (const struct object *op)
{
char *str;
const struct token_char *p;
char *q;
int n;
if (!(op->type == t_symbol || op->type == t_string))
abort ();
n = op->token->charcount;
str = (char *) xmalloc (n + 1);
q = str;
for (p = op->token->chars; n > 0; p++, n--)
*q++ = p->ch;
*q = '\0';
return str;
}
static flag_context_list_table_ty *flag_context_list_table;
static void
read_object (struct object *op, flag_context_ty outer_context)
{
for (;;)
{
struct char_syntax curr;
read_char_syntax (&curr);
switch (curr.scode)
{
case syntax_eof:
op->type = t_eof;
return;
case syntax_whitespace:
if (curr.ch == '\n')
if (last_non_comment_line > last_comment_line)
xgettext_comment_reset ();
continue;
case syntax_illegal:
op->type = t_other;
return;
case syntax_single_esc:
case syntax_multi_esc:
case syntax_constituent:
op->token = (struct token *) xmalloc (sizeof (struct token));
read_token (op->token, &curr);
last_non_comment_line = line_number;
if (!op->token->with_escape
&& op->token->charcount == 1
&& op->token->chars[0].attribute == a_dot)
{
free_token (op->token);
free (op->token);
op->type = t_dot;
return;
}
{
int base = read_base;
if (is_number (op->token, &base) != n_none)
{
free_token (op->token);
free (op->token);
op->type = t_other;
return;
}
}
case_convert_token (op->token);
op->type = t_symbol;
return;
case syntax_t_macro:
case syntax_nt_macro:
switch (curr.ch)
{
case '(':
{
int arg = 0;
flag_context_list_iterator_ty context_iter;
int argnum1 = 0;
int argnum2 = 0;
message_ty *plural_mp = NULL;
for (;; arg++)
{
struct object inner;
flag_context_ty inner_context;
if (arg == 0)
inner_context = null_context;
else
inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (
&context_iter));
read_object (&inner, inner_context);
if (inner.type == t_close)
{
op->type = t_other;
last_non_comment_line = line_number;
return;
}
if (inner.type == t_eof)
break;
if (arg == 0)
{
if (inner.type == t_symbol)
{
char *symbol_name = string_of_object (&inner);
int i;
int prefix_len;
void *keyword_value;
i = inner.token->charcount;
while (i > 0
&& inner.token->chars[i-1].attribute != a_pack_m)
i--;
prefix_len = i;
if (find_entry (&keywords,
symbol_name + prefix_len,
strlen (symbol_name + prefix_len),
&keyword_value)
== 0)
{
argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
argnum2 = (int) (long) keyword_value >> 10;
}
context_iter =
flag_context_list_iterator (
flag_context_list_table_lookup (
flag_context_list_table,
symbol_name, strlen (symbol_name)));
free (symbol_name);
}
else
context_iter = null_context_list_iterator;
}
else
{
if (arg == argnum1)
{
if (inner.type == t_string)
{
lex_pos_ty pos;
message_ty *mp;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
mp = remember_a_message (mlp, string_of_object (&inner),
inner_context, &pos);
if (argnum2 > 0)
plural_mp = mp;
}
}
else if (arg == argnum2)
{
if (inner.type == t_string && plural_mp != NULL)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
remember_a_message_plural (plural_mp, string_of_object (&inner),
inner_context, &pos);
}
}
}
free_object (&inner);
}
}
op->type = t_other;
last_non_comment_line = line_number;
return;
case ')':
op->type = t_close;
last_non_comment_line = line_number;
return;
case ',':
{
int c = do_getc ();
if (c != EOF && c != '@' && c != '.')
do_ungetc (c);
}
case '\'':
case '`':
{
struct object inner;
read_object (&inner, null_context);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case ';':
{
bool all_semicolons = true;
last_comment_line = line_number;
comment_start ();
for (;;)
{
int c = do_getc ();
if (c == EOF || c == '\n')
break;
if (c != ';')
all_semicolons = false;
if (!all_semicolons)
{
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
}
}
comment_line_end (0);
continue;
}
case '"':
{
op->token = (struct token *) xmalloc (sizeof (struct token));
init_token (op->token);
op->line_number_at_start = line_number;
for (;;)
{
int c = do_getc ();
if (c == EOF)
break;
if (c == '"')
break;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
break;
}
grow_token (op->token);
op->token->chars[op->token->charcount++].ch = c;
}
op->type = t_string;
if (extract_all)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = op->line_number_at_start;
remember_a_message (mlp, string_of_object (op),
null_context, &pos);
}
last_non_comment_line = line_number;
return;
}
case '#':
{
int c;
for (;;)
{
c = do_getc ();
if (c == EOF)
{
op->type = t_other;
return;
}
if (!(c >= '0' && c <= '9'))
break;
}
switch (c)
{
case '(':
case '"':
do_ungetc (c);
case '\'':
case ':':
case '.':
case ',':
case 'A': case 'a':
case 'C': case 'c':
case 'P': case 'p':
case 'S': case 's':
{
struct object inner;
read_object (&inner, null_context);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '|':
{
int depth = 0;
int c;
comment_start ();
c = do_getc ();
for (;;)
{
if (c == EOF)
break;
if (c == '|')
{
c = do_getc ();
if (c == EOF)
break;
if (c == '#')
{
if (depth == 0)
{
comment_line_end (0);
break;
}
depth--;
comment_add ('|');
comment_add ('#');
c = do_getc ();
}
else
comment_add ('|');
}
else if (c == '#')
{
c = do_getc ();
if (c == EOF)
break;
comment_add ('#');
if (c == '|')
{
depth++;
comment_add ('|');
c = do_getc ();
}
}
else
{
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
if (c == '\n')
{
comment_line_end (1);
comment_start ();
}
c = do_getc ();
}
}
if (c == EOF)
{
op->type = t_eof;
return;
}
last_comment_line = line_number;
continue;
}
case '\\':
{
struct token token;
struct char_syntax first;
first.ch = '\\';
first.scode = syntax_single_esc;
read_token (&token, &first);
free_token (&token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case 'B': case 'b':
case 'O': case 'o':
case 'X': case 'x':
case 'R': case 'r':
case '*':
{
struct token token;
read_token (&token, NULL);
free_token (&token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '=':
continue;
case '#':
op->type = t_other;
last_non_comment_line = line_number;
return;
case '+':
case '-':
{
struct object inner;
read_object (&inner, null_context);
free_object (&inner);
continue;
}
default:
op->type = t_other;
last_non_comment_line = line_number;
return;
}
abort ();
}
default:
abort ();
}
default:
abort ();
}
}
}
void
extract_lisp (FILE *f,
const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
mlp = mdlp->item[0]->messages;
fp = f;
real_file_name = real_filename;
logical_file_name = xstrdup (logical_filename);
line_number = 1;
last_comment_line = -1;
last_non_comment_line = -1;
flag_context_list_table = flag_table;
init_keywords ();
do
{
struct object toplevel_object;
read_object (&toplevel_object, null_context);
if (toplevel_object.type == t_eof)
break;
free_object (&toplevel_object);
}
while (!feof (fp));
fp = NULL;
real_file_name = NULL;
logical_file_name = NULL;
line_number = 0;
}