#include "m4.h"
#ifdef ENABLE_CHANGEWORD
#include "regex.h"
#endif
enum input_type
{
INPUT_FILE,
INPUT_STRING,
INPUT_MACRO
};
typedef enum input_type input_type;
struct input_block
{
struct input_block *prev;
input_type type;
union
{
struct
{
char *string;
}
u_s;
struct
{
FILE *file;
const char *name;
int lineno;
int out_lineno;
boolean advance_line;
}
u_f;
struct
{
builtin_func *func;
boolean traced;
}
u_m;
}
u;
};
typedef struct input_block input_block;
const char *current_file;
int current_line;
static struct obstack token_stack;
static struct obstack input_stack;
static struct obstack wrapup_stack;
static struct obstack *current_input;
static char *token_bottom;
static input_block *isp;
static input_block *wsp;
static input_block *next;
static boolean start_of_input_line;
#define CHAR_EOF 256
#define CHAR_MACRO 257
STRING rquote;
STRING lquote;
STRING bcomm;
STRING ecomm;
#ifdef ENABLE_CHANGEWORD
#define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
static char *word_start;
static struct re_pattern_buffer word_regexp;
static int default_word_regexp;
static struct re_registers regs;
#endif
void
push_file (FILE *fp, const char *title)
{
input_block *i;
if (next != NULL)
{
obstack_free (current_input, next);
next = NULL;
}
if (debug_level & DEBUG_TRACE_INPUT)
DEBUG_MESSAGE1 ("input read from %s", title);
i = (input_block *) obstack_alloc (current_input,
sizeof (struct input_block));
i->type = INPUT_FILE;
i->u.u_f.name = current_file;
i->u.u_f.lineno = current_line;
i->u.u_f.out_lineno = output_current_line;
i->u.u_f.advance_line = start_of_input_line;
current_file = obstack_copy0 (current_input, title, strlen (title));
current_line = 1;
output_current_line = -1;
i->u.u_f.file = fp;
i->prev = isp;
isp = i;
}
void
push_macro (builtin_func *func, boolean traced)
{
input_block *i;
if (next != NULL)
{
obstack_free (current_input, next);
next = NULL;
}
i = (input_block *) obstack_alloc (current_input,
sizeof (struct input_block));
i->type = INPUT_MACRO;
i->u.u_m.func = func;
i->u.u_m.traced = traced;
i->prev = isp;
isp = i;
}
struct obstack *
push_string_init (void)
{
if (next != NULL)
{
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: Recursive push_string!"));
abort ();
}
next = (input_block *) obstack_alloc (current_input,
sizeof (struct input_block));
next->type = INPUT_STRING;
return current_input;
}
const char *
push_string_finish (void)
{
const char *ret = NULL;
if (next == NULL)
return NULL;
if (obstack_object_size (current_input) > 0)
{
obstack_1grow (current_input, '\0');
next->u.u_s.string = obstack_finish (current_input);
next->prev = isp;
isp = next;
ret = isp->u.u_s.string;
}
else
obstack_free (current_input, next);
next = NULL;
return ret;
}
void
push_wrapup (const char *s)
{
input_block *i = (input_block *) obstack_alloc (&wrapup_stack,
sizeof (struct input_block));
i->prev = wsp;
i->type = INPUT_STRING;
i->u.u_s.string = obstack_copy0 (&wrapup_stack, s, strlen (s));
wsp = i;
}
static void
pop_input (void)
{
input_block *tmp = isp->prev;
switch (isp->type)
{
case INPUT_STRING:
case INPUT_MACRO:
break;
case INPUT_FILE:
if (debug_level & DEBUG_TRACE_INPUT)
DEBUG_MESSAGE2 ("input reverted to %s, line %d",
isp->u.u_f.name, isp->u.u_f.lineno);
fclose (isp->u.u_f.file);
current_file = isp->u.u_f.name;
current_line = isp->u.u_f.lineno;
output_current_line = isp->u.u_f.out_lineno;
start_of_input_line = isp->u.u_f.advance_line;
if (tmp != NULL)
output_current_line = -1;
break;
default:
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: Input stack botch in pop_input ()"));
abort ();
}
obstack_free (current_input, isp);
next = NULL;
isp = tmp;
}
boolean
pop_wrapup (void)
{
if (wsp == NULL)
return FALSE;
current_input = &wrapup_stack;
isp = wsp;
wsp = NULL;
return TRUE;
}
static void
init_macro_token (token_data *td)
{
if (isp->type != INPUT_MACRO)
{
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: Bad call to init_macro_token ()"));
abort ();
}
TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
TOKEN_DATA_FUNC (td) = isp->u.u_m.func;
TOKEN_DATA_FUNC_TRACED (td) = isp->u.u_m.traced;
}
int
peek_input (void)
{
register int ch;
while (1)
{
if (isp == NULL)
return CHAR_EOF;
switch (isp->type)
{
case INPUT_STRING:
ch = isp->u.u_s.string[0];
if (ch != '\0')
return ch;
break;
case INPUT_FILE:
ch = getc (isp->u.u_f.file);
if (ch != EOF)
{
ungetc (ch, isp->u.u_f.file);
return ch;
}
break;
case INPUT_MACRO:
return CHAR_MACRO;
default:
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: Input stack botch in peek_input ()"));
abort ();
}
pop_input ();
}
}
static int
next_char (void)
{
register int ch;
if (start_of_input_line)
{
start_of_input_line = FALSE;
current_line++;
}
while (1)
{
if (isp == NULL)
return CHAR_EOF;
switch (isp->type)
{
case INPUT_STRING:
ch = *isp->u.u_s.string++;
if (ch != '\0')
return ch;
break;
case INPUT_FILE:
ch = getc (isp->u.u_f.file);
if (ch != EOF)
{
if (ch == '\n')
start_of_input_line = TRUE;
return ch;
}
break;
case INPUT_MACRO:
pop_input ();
return CHAR_MACRO;
default:
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: Input stack botch in next_char ()"));
abort ();
}
pop_input ();
}
}
void
skip_line (void)
{
int ch;
while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
;
}
static int
match_input (const char *s)
{
int n;
int ch;
const char *t;
ch = peek_input ();
if (ch != *s)
return 0;
(void) next_char ();
if (s[1] == '\0')
return 1;
for (n = 1, t = s++; (ch = peek_input ()) == *s++; n++)
{
(void) next_char ();
if (*s == '\0')
return 1;
}
obstack_grow (push_string_init (), t, n);
push_string_finish ();
return 0;
}
#define MATCH(ch, s) \
((s)[0] == (ch) \
&& (ch) != '\0' \
&& ((s)[1] == '\0' \
|| (match_input ((s) + 1) ? (ch) = peek_input (), 1 : 0)))
void
input_init (void)
{
current_file = "NONE";
current_line = 0;
obstack_init (&token_stack);
obstack_init (&input_stack);
obstack_init (&wrapup_stack);
current_input = &input_stack;
obstack_1grow (&token_stack, '\0');
token_bottom = obstack_finish (&token_stack);
isp = NULL;
wsp = NULL;
next = NULL;
start_of_input_line = FALSE;
lquote.string = xstrdup (DEF_LQUOTE);
lquote.length = strlen (lquote.string);
rquote.string = xstrdup (DEF_RQUOTE);
rquote.length = strlen (rquote.string);
bcomm.string = xstrdup (DEF_BCOMM);
bcomm.length = strlen (bcomm.string);
ecomm.string = xstrdup (DEF_ECOMM);
ecomm.length = strlen (ecomm.string);
#ifdef ENABLE_CHANGEWORD
if (user_word_regexp)
set_word_regexp (user_word_regexp);
else
set_word_regexp (DEFAULT_WORD_REGEXP);
#endif
}
void
set_quotes (const char *lq, const char *rq)
{
xfree (lquote.string);
xfree (rquote.string);
lquote.string = xstrdup (lq ? lq : DEF_LQUOTE);
lquote.length = strlen (lquote.string);
rquote.string = xstrdup (rq ? rq : DEF_RQUOTE);
rquote.length = strlen (rquote.string);
}
void
set_comment (const char *bc, const char *ec)
{
xfree (bcomm.string);
xfree (ecomm.string);
bcomm.string = xstrdup (bc ? bc : DEF_BCOMM);
bcomm.length = strlen (bcomm.string);
ecomm.string = xstrdup (ec ? ec : DEF_ECOMM);
ecomm.length = strlen (ecomm.string);
}
#ifdef ENABLE_CHANGEWORD
void
set_word_regexp (const char *regexp)
{
int i;
char test[2];
const char *msg;
if (!strcmp (regexp, DEFAULT_WORD_REGEXP))
{
default_word_regexp = TRUE;
return;
}
default_word_regexp = FALSE;
msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
if (msg != NULL)
{
M4ERROR ((warning_status, 0,
"Bad regular expression `%s': %s", regexp, msg));
return;
}
if (word_start == NULL)
word_start = xmalloc (256);
word_start[0] = '\0';
test[1] = '\0';
for (i = 1; i < 256; i++)
{
test[0] = i;
if (re_search (&word_regexp, test, 1, 0, 0, ®s) >= 0)
strcat (word_start, test);
}
}
#endif
token_type
next_token (token_data *td)
{
int ch;
int quote_level;
token_type type;
#ifdef ENABLE_CHANGEWORD
int startpos;
char *orig_text = 0;
#endif
obstack_free (&token_stack, token_bottom);
obstack_1grow (&token_stack, '\0');
token_bottom = obstack_finish (&token_stack);
ch = peek_input ();
if (ch == CHAR_EOF)
{
return TOKEN_EOF;
#ifdef DEBUG_INPUT
fprintf (stderr, "next_token -> EOF\n");
#endif
}
if (ch == CHAR_MACRO)
{
init_macro_token (td);
(void) next_char ();
return TOKEN_MACDEF;
}
(void) next_char ();
if (MATCH (ch, bcomm.string))
{
obstack_grow (&token_stack, bcomm.string, bcomm.length);
while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm.string))
obstack_1grow (&token_stack, ch);
if (ch != CHAR_EOF)
obstack_grow (&token_stack, ecomm.string, ecomm.length);
type = TOKEN_STRING;
}
#ifdef ENABLE_CHANGEWORD
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
#else
else if (isalpha (ch) || ch == '_')
#endif
{
obstack_1grow (&token_stack, ch);
while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
(void) next_char ();
}
type = TOKEN_WORD;
}
#ifdef ENABLE_CHANGEWORD
else if (!default_word_regexp && strchr (word_start, ch))
{
obstack_1grow (&token_stack, ch);
while (1)
{
ch = peek_input ();
if (ch == CHAR_EOF)
break;
obstack_1grow (&token_stack, ch);
startpos = re_search (&word_regexp, obstack_base (&token_stack),
obstack_object_size (&token_stack), 0, 0,
®s);
if (startpos != 0 ||
regs.end [0] != obstack_object_size (&token_stack))
{
*(((char *) obstack_base (&token_stack)
+ obstack_object_size (&token_stack)) - 1) = '\0';
break;
}
next_char ();
}
obstack_1grow (&token_stack, '\0');
orig_text = obstack_finish (&token_stack);
if (regs.start[1] != -1)
obstack_grow (&token_stack,orig_text + regs.start[1],
regs.end[1] - regs.start[1]);
else
obstack_grow (&token_stack, orig_text,regs.end[0]);
type = TOKEN_WORD;
}
#endif
else if (!MATCH (ch, lquote.string))
{
type = TOKEN_SIMPLE;
obstack_1grow (&token_stack, ch);
}
else
{
quote_level = 1;
while (1)
{
ch = next_char ();
if (ch == CHAR_EOF)
M4ERROR ((EXIT_FAILURE, 0,
"ERROR: EOF in string"));
if (MATCH (ch, rquote.string))
{
if (--quote_level == 0)
break;
obstack_grow (&token_stack, rquote.string, rquote.length);
}
else if (MATCH (ch, lquote.string))
{
quote_level++;
obstack_grow (&token_stack, lquote.string, lquote.length);
}
else
obstack_1grow (&token_stack, ch);
}
type = TOKEN_STRING;
}
obstack_1grow (&token_stack, '\0');
TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
TOKEN_DATA_TEXT (td) = obstack_finish (&token_stack);
#ifdef ENABLE_CHANGEWORD
if (orig_text == NULL)
orig_text = TOKEN_DATA_TEXT (td);
TOKEN_DATA_ORIG_TEXT (td) = orig_text;
#endif
#ifdef DEBUG_INPUT
fprintf (stderr, "next_token -> %d (%s)\n", type, TOKEN_DATA_TEXT (td));
#endif
return type;
}
#ifdef DEBUG_INPUT
static void
print_token (const char *s, token_type t, token_data *td)
{
fprintf (stderr, "%s: ", s);
switch (t)
{
case TOKEN_SIMPLE:
fprintf (stderr, "char:");
break;
case TOKEN_WORD:
fprintf (stderr, "word:");
break;
case TOKEN_STRING:
fprintf (stderr, "string:");
break;
case TOKEN_MACDEF:
fprintf (stderr, "macro: 0x%x\n", TOKEN_DATA_FUNC (td));
break;
case TOKEN_EOF:
fprintf (stderr, "eof\n");
break;
}
fprintf (stderr, "\t\"%s\"\n", TOKEN_DATA_TEXT (td));
}
static void
lex_debug (void)
{
token_type t;
token_data td;
while ((t = next_token (&td)) != NULL)
print_token ("lex", t, &td);
}
#endif