#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "message.h"
#include "xgettext.h"
#include "x-sh.h"
#include "error.h"
#include "xalloc.h"
#include "exit.h"
#include "hash.h"
#include "gettext.h"
#define _(s) gettext(s)
static bool extract_all = false;
static hash_table keywords;
static bool default_keywords = true;
void
x_sh_extract_all ()
{
extract_all = true;
}
void
x_sh_keyword (const char *name)
{
if (name == NULL)
default_keywords = false;
else
{
const char *end;
int argnum1;
int argnum2;
const char *colon;
if (keywords.table == NULL)
init_hash (&keywords, 100);
split_keywordspec (name, &end, &argnum1, &argnum2);
colon = strchr (name, ':');
if (colon == NULL || colon >= end)
{
if (argnum1 == 0)
argnum1 = 1;
insert_entry (&keywords, name, end - name,
(void *) (long) (argnum1 + (argnum2 << 10)));
}
}
}
static void
init_keywords ()
{
if (default_keywords)
{
x_sh_keyword ("gettext");
x_sh_keyword ("ngettext:1,2");
x_sh_keyword ("eval_gettext");
x_sh_keyword ("eval_ngettext:1,2");
default_keywords = false;
}
}
void
init_flag_table_sh ()
{
xgettext_record_flag ("gettext:1:pass-sh-format");
xgettext_record_flag ("ngettext:1:pass-sh-format");
xgettext_record_flag ("ngettext:2:pass-sh-format");
xgettext_record_flag ("eval_gettext:1:sh-format");
xgettext_record_flag ("eval_ngettext:1:sh-format");
xgettext_record_flag ("eval_ngettext:2:sh-format");
}
static const char *real_file_name;
static char *logical_file_name;
static int line_number;
static FILE *fp;
static int
do_getc ()
{
int c = getc (fp);
if (c == EOF)
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("\
error while reading \"%s\""), real_file_name);
}
else if (c == '\n')
line_number++;
return c;
}
static void
do_ungetc (int c)
{
if (c == '\n')
line_number--;
ungetc (c, fp);
}
static int phase1_pushback[4];
static int phase1_pushback_length;
static int
phase1_getc ()
{
int c;
if (phase1_pushback_length)
{
c = phase1_pushback[--phase1_pushback_length];
if (c == '\n')
++line_number;
return c;
}
for (;;)
{
c = do_getc ();
if (c != '\\')
return c;
c = do_getc ();
if (c != '\n')
{
if (c != EOF)
do_ungetc (c);
return '\\';
}
}
}
static void
phase1_ungetc (int c)
{
switch (c)
{
case EOF:
break;
case '\n':
--line_number;
default:
phase1_pushback[phase1_pushback_length++] = c;
break;
}
}
struct token
{
int allocated;
int charcount;
char *chars;
};
static inline void
init_token (struct token *tp)
{
tp->allocated = 10;
tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
tp->charcount = 0;
}
static inline void
free_token (struct token *tp)
{
free (tp->chars);
}
static inline void
grow_token (struct token *tp)
{
if (tp->charcount == tp->allocated)
{
tp->allocated *= 2;
tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
}
}
static char *
string_of_token (const struct token *tp)
{
char *str;
int n;
n = tp->charcount;
str = (char *) xmalloc (n + 1);
memcpy (str, tp->chars, n);
str[n] = '\0';
return str;
}
static message_list_ty *mlp;
static char *buffer;
static size_t bufmax;
static size_t buflen;
static inline void
comment_start ()
{
buflen = 0;
}
static inline void
comment_add (int c)
{
if (buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen++] = c;
}
static inline void
comment_line_end ()
{
while (buflen >= 1
&& (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
--buflen;
if (buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen] = '\0';
xgettext_comment_add (buffer);
}
static int last_comment_line;
static int last_non_comment_line;
static unsigned int nested_backquotes;
static unsigned int open_doublequotes_mask;
static bool open_doublequote;
static bool open_singlequote;
static inline void
saw_opening_backquote ()
{
if (open_singlequote)
abort ();
if (open_doublequote)
open_doublequotes_mask |= (unsigned int) 1 << nested_backquotes;
nested_backquotes++;
open_doublequote = false;
}
static inline void
saw_closing_backquote ()
{
nested_backquotes--;
open_doublequote = (open_doublequotes_mask >> nested_backquotes) & 1;
open_doublequotes_mask &= ((unsigned int) 1 << nested_backquotes) - 1;
open_singlequote = false;
}
static inline void
saw_opening_doublequote ()
{
if (open_singlequote || open_doublequote)
abort ();
open_doublequote = true;
}
static inline void
saw_closing_doublequote ()
{
if (open_singlequote || !open_doublequote)
abort ();
open_doublequote = false;
}
static inline void
saw_opening_singlequote ()
{
if (open_doublequote || open_singlequote)
abort ();
open_singlequote = true;
}
static inline void
saw_closing_singlequote ()
{
if (open_doublequote || !open_singlequote)
abort ();
open_singlequote = false;
}
enum word_type
{
t_string,
t_other,
t_separator,
t_redirect,
t_backquote,
t_paren,
t_eof
};
struct word
{
enum word_type type;
struct token *token;
int line_number_at_start;
};
static inline void
free_word (struct word *wp)
{
if (wp->type == t_string)
{
free_token (wp->token);
free (wp->token);
}
}
static char *
string_of_word (const struct word *wp)
{
char *str;
int n;
if (!(wp->type == t_string))
abort ();
n = wp->token->charcount;
str = (char *) xmalloc (n + 1);
memcpy (str, wp->token->chars, n);
str[n] = '\0';
return str;
}
static inline bool
is_whitespace (int c)
{
return (c == ' ' || c == '\t' || c == '\n');
}
static inline bool
is_operator_start (int c)
{
return (c == '|' || c == '&' || c == ';' || c == '<' || c == '>'
|| c == '(' || c == ')');
}
#define QUOTED(c) (UCHAR_MAX + 1 + (c))
#define OPENING_BACKQUOTE (2 * (UCHAR_MAX + 1) + '`')
#define CLOSING_BACKQUOTE (3 * (UCHAR_MAX + 1) + '`')
static int phase2_pushback[4];
static int phase2_pushback_length;
static void phase2_ungetc (int c);
static int
phase2_getc ()
{
int c;
if (phase2_pushback_length)
{
c = phase2_pushback[--phase2_pushback_length];
if (c == '\n')
++line_number;
return c;
}
c = phase1_getc ();
if (c == EOF)
return c;
if (c == '\'')
return (open_doublequote ? QUOTED (c) : c);
if (!open_singlequote)
{
if (c == '"' || c == '$')
return c;
if (c == '`')
return (nested_backquotes > 0 ? CLOSING_BACKQUOTE : OPENING_BACKQUOTE);
}
if (c == '\\')
{
unsigned int debackslahify =
nested_backquotes + (open_singlequote ? 0 : 1);
unsigned int expected_count =
(unsigned int) 1 << debackslahify;
unsigned int count;
for (count = 1; count < expected_count; count++)
{
c = phase1_getc ();
if (c != '\\')
break;
}
if (count == expected_count)
return '\\';
if (c == '\'')
{
if (!open_singlequote && count > (expected_count >> 1))
{
phase1_ungetc (c);
return '\\';
}
else
return (open_doublequote ? QUOTED (c) : c);
}
else if (c == '"')
{
if (open_singlequote)
{
if (count > open_doublequotes_mask)
{
phase2_ungetc (c);
return '\\';
}
else
return QUOTED (c);
}
else
{
if (count > open_doublequotes_mask)
return QUOTED (c);
else
return c;
}
}
else if (c == '`')
{
if (count == expected_count - 1)
return c;
else
if (nested_backquotes > 0 && !open_singlequote
&& count >= (expected_count >> 2))
return OPENING_BACKQUOTE;
else
return CLOSING_BACKQUOTE;
}
else if (c == '$')
{
if (open_singlequote)
return QUOTED (c);
if (count >= (expected_count >> 1))
return QUOTED (c);
else
return c;
}
else
{
if (open_doublequote || open_singlequote)
{
if (count > 0)
{
phase1_ungetc (c);
return '\\';
}
else
return QUOTED (c);
}
else
{
if (count > (expected_count >> 1))
{
phase1_ungetc (c);
return '\\';
}
else if (count > 0)
return QUOTED (c);
else
return c;
}
}
}
return (open_singlequote || open_doublequote ? QUOTED (c) : c);
}
static void
phase2_ungetc (int c)
{
switch (c)
{
case EOF:
break;
case '\n':
--line_number;
default:
phase2_pushback[phase2_pushback_length++] = c;
break;
}
}
static flag_context_list_table_ty *flag_context_list_table;
static enum word_type read_command_list (int looking_for,
flag_context_ty outer_context);
static void
read_word (struct word *wp, int looking_for, flag_context_ty context)
{
int c;
bool all_unquoted_digits;
do
{
c = phase2_getc ();
if (c == '#')
{
last_comment_line = line_number;
comment_start ();
for (;;)
{
c = phase1_getc ();
if (c == EOF || c == '\n')
break;
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
}
comment_line_end ();
}
if (c == '\n')
{
if (last_non_comment_line > last_comment_line)
xgettext_comment_reset ();
wp->type = t_separator;
return;
}
}
while (is_whitespace (c));
if (c == EOF)
{
wp->type = t_eof;
return;
}
if (c == '<' || c == '>')
{
int c2 = phase2_getc ();
if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
{
if (c == '<' && c2 == '<')
{
int c3 = phase2_getc ();
if (c3 != '-')
phase2_ungetc (c3);
}
}
else
phase2_ungetc (c2);
wp->type = t_redirect;
return;
}
if (looking_for == CLOSING_BACKQUOTE && c == CLOSING_BACKQUOTE)
{
saw_closing_backquote ();
wp->type = t_backquote;
last_non_comment_line = line_number;
return;
}
if (looking_for == ')' && c == ')')
{
wp->type = t_paren;
last_non_comment_line = line_number;
return;
}
if (is_operator_start (c))
{
wp->type = (c == ';' ? t_separator : t_other);
return;
}
wp->type = t_string;
wp->token = (struct token *) xmalloc (sizeof (struct token));
init_token (wp->token);
wp->line_number_at_start = line_number;
all_unquoted_digits = true;
for (;; c = phase2_getc ())
{
if (c == EOF)
break;
if (all_unquoted_digits && (c == '<' || c == '>'))
{
int c2 = phase2_getc ();
if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
{
if (c == '<' && c2 == '<')
{
int c3 = phase2_getc ();
if (c3 != '-')
phase2_ungetc (c3);
}
}
else
phase2_ungetc (c2);
wp->type = t_redirect;
free_token (wp->token);
free (wp->token);
last_non_comment_line = line_number;
return;
}
all_unquoted_digits = all_unquoted_digits && (c >= '0' && c <= '9');
if (c == '$')
{
int c2 = phase2_getc ();
if (c2 == '(')
{
int c3 = phase2_getc ();
if (c3 == '(')
{
unsigned int depth = 2;
do
{
c = phase2_getc ();
if (c == '(')
depth++;
else if (c == ')')
if (--depth == 0)
break;
}
while (c != EOF);
}
else
{
phase2_ungetc (c3);
read_command_list (')', context);
}
}
else if (c2 == '\'' && !open_singlequote)
{
saw_opening_singlequote ();
for (;;)
{
c = phase2_getc ();
if (c == EOF)
break;
if (c == '\'')
{
saw_closing_singlequote ();
break;
}
if (c == '\\')
{
c = phase2_getc ();
switch (c)
{
default:
phase2_ungetc (c);
c = '\\';
break;
case '\\':
break;
case '\'':
break;
case 'a':
c = '\a';
break;
case 'b':
c = '\b';
break;
case 'e':
c = 0x1b;
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case 'x':
c = phase2_getc ();
if ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'F')
|| (c >= 'a' && c <= 'f'))
{
int n;
if (c >= '0' && c <= '9')
n = c - '0';
else if (c >= 'A' && c <= 'F')
n = 10 + c - 'A';
else if (c >= 'a' && c <= 'f')
n = 10 + c - 'a';
else
abort ();
c = phase2_getc ();
if ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'F')
|| (c >= 'a' && c <= 'f'))
{
if (c >= '0' && c <= '9')
n = n * 16 + c - '0';
else if (c >= 'A' && c <= 'F')
n = n * 16 + 10 + c - 'A';
else if (c >= 'a' && c <= 'f')
n = n * 16 + 10 + c - 'a';
else
abort ();
}
else
phase2_ungetc (c);
c = n;
}
else
{
phase2_ungetc (c);
phase2_ungetc ('x');
c = '\\';
}
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
int n = c - '0';
c = phase2_getc ();
if (c >= '0' && c <= '7')
{
n = n * 8 + c - '0';
c = phase2_getc ();
if (c >= '0' && c <= '7')
n = n * 8 + c - '0';
else
phase2_ungetc (c);
}
else
phase2_ungetc (c);
c = n;
}
break;
}
}
if (wp->type == t_string)
{
grow_token (wp->token);
wp->token->chars[wp->token->charcount++] =
(unsigned char) c;
}
}
continue;
}
else if (c2 == '"' && !open_doublequote)
{
lex_pos_ty pos;
struct token string;
saw_opening_doublequote ();
pos.file_name = logical_file_name;
pos.line_number = line_number;
init_token (&string);
for (;;)
{
c = phase2_getc ();
if (c == EOF)
break;
if (c == '"')
{
saw_closing_doublequote ();
break;
}
grow_token (&string);
string.chars[string.charcount++] = (unsigned char) c;
}
remember_a_message (mlp, string_of_token (&string),
context, &pos);
free_token (&string);
error_with_progname = false;
error (0, 0, _("%s:%lu: warning: the syntax $\"...\" is deprecated due to security reasons; use eval_gettext instead"),
pos.file_name, (unsigned long) pos.line_number);
error_with_progname = true;
}
else
phase2_ungetc (c2);
wp->type = t_other;
continue;
}
if (c == '\'')
{
if (!open_singlequote)
{
saw_opening_singlequote ();
}
else
{
saw_closing_singlequote ();
}
continue;
}
if (c == '"')
{
if (!open_doublequote)
{
saw_opening_doublequote ();
}
else
{
saw_closing_doublequote ();
}
continue;
}
if (c == OPENING_BACKQUOTE)
{
saw_opening_backquote ();
read_command_list (CLOSING_BACKQUOTE, context);
wp->type = t_other;
continue;
}
if (c == CLOSING_BACKQUOTE)
break;
if (!open_singlequote && !open_doublequote
&& (is_whitespace (c) || is_operator_start (c)))
break;
if (wp->type == t_string)
{
grow_token (wp->token);
wp->token->chars[wp->token->charcount++] = (unsigned char) c;
}
}
phase2_ungetc (c);
if (wp->type != t_string)
{
free_token (wp->token);
free (wp->token);
}
last_non_comment_line = line_number;
}
static enum word_type
read_command (int looking_for, flag_context_ty outer_context)
{
int arg = 0;
bool arg_of_redirect = false;
flag_context_list_iterator_ty context_iter;
int argnum1 = -1;
int argnum2 = -1;
message_ty *plural_mp = NULL;
for (;;)
{
struct word inner;
flag_context_ty inner_context;
if (arg == 0)
inner_context = null_context;
else
inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (
&context_iter));
read_word (&inner, looking_for, inner_context);
if (inner.type == t_separator
|| inner.type == t_backquote || inner.type == t_paren
|| inner.type == t_eof)
return inner.type;
if (extract_all)
{
if (inner.type == t_string)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
remember_a_message (mlp, string_of_word (&inner),
inner_context, &pos);
}
}
if (arg_of_redirect)
{
arg_of_redirect = false;
}
else if (inner.type == t_redirect)
{
arg_of_redirect = true;
}
else
{
if (argnum1 < 0 && argnum2 < 0)
{
arg = 0;
if (inner.type == t_string)
{
char *function_name = string_of_word (&inner);
void *keyword_value;
if (find_entry (&keywords,
function_name, strlen (function_name),
&keyword_value)
== 0)
{
argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
argnum2 = (int) (long) keyword_value >> 10;
}
context_iter =
flag_context_list_iterator (
flag_context_list_table_lookup (
flag_context_list_table,
function_name, strlen (function_name)));
free (function_name);
}
else
context_iter = null_context_list_iterator;
}
else
{
if (arg == argnum1)
{
if (inner.type == t_string)
{
lex_pos_ty pos;
message_ty *mp;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
mp = remember_a_message (mlp, string_of_word (&inner),
inner_context, &pos);
if (argnum2 > 0)
plural_mp = mp;
}
}
else if (arg == argnum2)
{
if (inner.type == t_string && plural_mp != NULL)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = inner.line_number_at_start;
remember_a_message_plural (plural_mp, string_of_word (&inner),
inner_context, &pos);
}
}
if (arg >= argnum1 && arg >= argnum2)
{
argnum1 = -1;
argnum2 = -1;
plural_mp = NULL;
}
}
arg++;
}
free_word (&inner);
}
}
static enum word_type
read_command_list (int looking_for, flag_context_ty outer_context)
{
for (;;)
{
enum word_type terminator;
terminator = read_command (looking_for, outer_context);
if (terminator != t_separator)
return terminator;
}
}
void
extract_sh (FILE *f,
const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
mlp = mdlp->item[0]->messages;
fp = f;
real_file_name = real_filename;
logical_file_name = xstrdup (logical_filename);
line_number = 1;
last_comment_line = -1;
last_non_comment_line = -1;
nested_backquotes = 0;
open_doublequotes_mask = 0;
open_doublequote = false;
open_singlequote = false;
flag_context_list_table = flag_table;
init_keywords ();
read_command_list ('\0', null_context);
fp = NULL;
real_file_name = NULL;
logical_file_name = NULL;
line_number = 0;
}