#include <stdio.h>
#include "as.h"
#if (__STDC__ != 1)
#ifndef const
#define const
#endif
#endif
#ifdef TC_M68K
static int scrub_m68k_mri;
static const char mri_pseudo[] = ".mri 0";
#else
#define scrub_m68k_mri 0
#endif
#if defined TC_ARM && defined OBJ_ELF
static const char symver_pseudo[] = ".symver";
static const char * symver_state;
#endif
static char lex[256];
static const char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
#define LEX_IS_SYMBOL_COMPONENT 1
#define LEX_IS_WHITESPACE 2
#define LEX_IS_LINE_SEPARATOR 3
#define LEX_IS_COMMENT_START 4
#define LEX_IS_LINE_COMMENT_START 5
#define LEX_IS_TWOCHAR_COMMENT_1ST 6
#define LEX_IS_STRINGQUOTE 8
#define LEX_IS_COLON 9
#define LEX_IS_NEWLINE 10
#define LEX_IS_ONECHAR_QUOTE 11
#ifdef TC_V850
#define LEX_IS_DOUBLEDASH_1ST 12
#endif
#ifdef TC_M32R
#define DOUBLEBAR_PARALLEL
#endif
#ifdef DOUBLEBAR_PARALLEL
#define LEX_IS_DOUBLEBAR_1ST 13
#endif
#define LEX_IS_PARALLEL_SEPARATOR 14
#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
static int process_escape (int);
void
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
{
const char *p;
int c;
lex[' '] = LEX_IS_WHITESPACE;
lex['\t'] = LEX_IS_WHITESPACE;
lex['\r'] = LEX_IS_WHITESPACE;
lex['\n'] = LEX_IS_NEWLINE;
lex[':'] = LEX_IS_COLON;
#ifdef TC_M68K
scrub_m68k_mri = m68k_mri;
if (! m68k_mri)
#endif
{
lex['"'] = LEX_IS_STRINGQUOTE;
#if ! defined (TC_HPPA) && ! defined (TC_I370)
lex['\''] = LEX_IS_ONECHAR_QUOTE;
#endif
#ifdef SINGLE_QUOTE_STRINGS
lex['\''] = LEX_IS_STRINGQUOTE;
#endif
}
for (p = symbol_chars; *p; ++p)
lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
for (c = 128; c < 256; ++c)
lex[c] = LEX_IS_SYMBOL_COMPONENT;
#ifdef tc_symbol_chars
for (p = tc_symbol_chars; *p; ++p)
lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
#endif
#ifndef tc_comment_chars
#define tc_comment_chars comment_chars
#endif
for (p = tc_comment_chars; *p; p++)
lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
for (p = line_comment_chars; *p; p++)
lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
for (p = line_separator_chars; *p; p++)
lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
#ifdef tc_parallel_separator_chars
for (p = tc_parallel_separator_chars; *p; p++)
lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
#endif
if (lex['/'] == 0)
lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
#ifdef TC_M68K
if (m68k_mri)
{
lex['\''] = LEX_IS_STRINGQUOTE;
lex[';'] = LEX_IS_COMMENT_START;
lex['*'] = LEX_IS_LINE_COMMENT_START;
lex['!'] = LEX_IS_LINE_COMMENT_START;
}
#endif
#ifdef TC_V850
lex['-'] = LEX_IS_DOUBLEDASH_1ST;
#endif
#ifdef DOUBLEBAR_PARALLEL
lex['|'] = LEX_IS_DOUBLEBAR_1ST;
#endif
#ifdef TC_D30V
lex['-'] = LEX_IS_SYMBOL_COMPONENT;
#endif
}
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines;
static char *saved_input;
static int saved_input_len;
static char input_buffer[32 * 1024];
static const char *mri_state;
static char mri_last_ch;
struct app_save
{
int state;
int old_state;
char * out_string;
char out_buf[sizeof (out_buf)];
int add_newlines;
char * saved_input;
int saved_input_len;
#ifdef TC_M68K
int scrub_m68k_mri;
#endif
const char * mri_state;
char mri_last_ch;
#if defined TC_ARM && defined OBJ_ELF
const char * symver_state;
#endif
};
char *
app_push (void)
{
register struct app_save *saved;
saved = (struct app_save *) xmalloc (sizeof (*saved));
saved->state = state;
saved->old_state = old_state;
saved->out_string = out_string;
memcpy (saved->out_buf, out_buf, sizeof (out_buf));
saved->add_newlines = add_newlines;
if (saved_input == NULL)
saved->saved_input = NULL;
else
{
saved->saved_input = xmalloc (saved_input_len);
memcpy (saved->saved_input, saved_input, saved_input_len);
saved->saved_input_len = saved_input_len;
}
#ifdef TC_M68K
saved->scrub_m68k_mri = scrub_m68k_mri;
#endif
saved->mri_state = mri_state;
saved->mri_last_ch = mri_last_ch;
#if defined TC_ARM && defined OBJ_ELF
saved->symver_state = symver_state;
#endif
state = 0;
saved_input = NULL;
return (char *) saved;
}
void
app_pop (char *arg)
{
register struct app_save *saved = (struct app_save *) arg;
state = saved->state;
old_state = saved->old_state;
out_string = saved->out_string;
memcpy (out_buf, saved->out_buf, sizeof (out_buf));
add_newlines = saved->add_newlines;
if (saved->saved_input == NULL)
saved_input = NULL;
else
{
assert (saved->saved_input_len <= (int) (sizeof input_buffer));
memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
saved_input = input_buffer;
saved_input_len = saved->saved_input_len;
free (saved->saved_input);
}
#ifdef TC_M68K
scrub_m68k_mri = saved->scrub_m68k_mri;
#endif
mri_state = saved->mri_state;
mri_last_ch = saved->mri_last_ch;
#if defined TC_ARM && defined OBJ_ELF
symver_state = saved->symver_state;
#endif
free (arg);
}
static int
process_escape (int ch)
{
switch (ch)
{
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case '\'':
return '\'';
case '"':
return '\"';
default:
return ch;
}
}
int
do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
{
char *to = tostart;
char *toend = tostart + tolen;
char *from;
char *fromend;
int fromlen;
register int ch, ch2 = 0;
#define GET() \
(from < fromend \
? * (unsigned char *) (from++) \
: (saved_input = NULL, \
fromlen = (*get) (input_buffer, sizeof input_buffer), \
from = input_buffer, \
fromend = from + fromlen, \
(fromlen == 0 \
? EOF \
: * (unsigned char *) (from++))))
#define UNGET(uch) (*--from = (uch))
#define PUT(pch) \
do \
{ \
*to++ = (pch); \
if (to >= toend) \
goto tofull; \
} \
while (0)
if (saved_input != NULL)
{
from = saved_input;
fromend = from + saved_input_len;
}
else
{
fromlen = (*get) (input_buffer, sizeof input_buffer);
if (fromlen == 0)
return 0;
from = input_buffer;
fromend = from + fromlen;
}
while (1)
{
switch (state)
{
case -1:
ch = *out_string++;
if (*out_string == '\0')
{
state = old_state;
old_state = 3;
}
PUT (ch);
continue;
case -2:
for (;;)
{
do
{
ch = GET ();
if (ch == EOF)
{
as_warn (_("end of file in comment"));
goto fromeof;
}
if (ch == '\n')
PUT ('\n');
}
while (ch != '*');
while ((ch = GET ()) == '*')
;
if (ch == EOF)
{
as_warn (_("end of file in comment"));
goto fromeof;
}
if (ch == '/')
break;
UNGET (ch);
}
state = old_state;
UNGET (' ');
continue;
case 4:
ch = GET ();
if (ch == EOF)
goto fromeof;
else if (ch >= '0' && ch <= '9')
PUT (ch);
else
{
while (ch != EOF && IS_WHITESPACE (ch))
ch = GET ();
if (ch == '"')
{
UNGET (ch);
if (scrub_m68k_mri)
out_string = "\n\tappfile ";
else
out_string = "\n\t.appfile ";
old_state = 7;
state = -1;
PUT (*out_string++);
}
else
{
while (ch != EOF && ch != '\n')
ch = GET ();
state = 0;
PUT (ch);
}
}
continue;
case 5:
{
char *s;
int len;
for (s = from; s < fromend; s++)
{
ch = *s;
if (ch == '\\'
|| ch == '"'
|| ch == '\''
|| ch == '\n')
break;
}
len = s - from;
if (len > toend - to)
len = toend - to;
if (len > 0)
{
memcpy (to, from, len);
to += len;
from += len;
}
}
ch = GET ();
if (ch == EOF)
{
as_warn (_("end of file in string; inserted '\"'"));
state = old_state;
UNGET ('\n');
PUT ('"');
}
else if (lex[ch] == LEX_IS_STRINGQUOTE)
{
state = old_state;
PUT (ch);
}
#ifndef NO_STRING_ESCAPES
else if (ch == '\\')
{
state = 6;
PUT (ch);
}
#endif
else if (scrub_m68k_mri && ch == '\n')
{
state = old_state;
UNGET (ch);
PUT ('\'');
}
else
{
PUT (ch);
}
continue;
case 6:
state = 5;
ch = GET ();
switch (ch)
{
case '\n':
UNGET ('n');
add_newlines++;
PUT ('\\');
continue;
case EOF:
as_warn (_("end of file in string; '\"' inserted"));
PUT ('"');
continue;
case '"':
case '\\':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case 'x':
case 'X':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
break;
default:
#ifdef ONLY_STANDARD_ESCAPES
as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
#endif
break;
}
PUT (ch);
continue;
case 7:
ch = GET ();
state = 5;
old_state = 8;
if (ch == EOF)
goto fromeof;
PUT (ch);
continue;
case 8:
do
ch = GET ();
while (ch != '\n' && ch != EOF);
if (ch == EOF)
goto fromeof;
state = 0;
PUT (ch);
continue;
#ifdef DOUBLEBAR_PARALLEL
case 13:
ch = GET ();
if (ch != '|')
abort ();
state = 1;
PUT ('|');
continue;
#endif
}
ch = GET ();
#ifdef TC_IA64
if (ch == '(' && (state == 0 || state == 1))
{
state += 14;
PUT (ch);
continue;
}
else if (state == 14 || state == 15)
{
if (ch == ')')
{
state -= 14;
PUT (ch);
ch = GET ();
}
else
{
PUT (ch);
continue;
}
}
#endif
recycle:
#if defined TC_ARM && defined OBJ_ELF
if (symver_state == NULL)
{
if ((state == 0 || state == 1) && ch == symver_pseudo[0])
symver_state = symver_pseudo + 1;
}
else
{
if (ch != '\0' && (*symver_state == ch))
++symver_state;
else if (*symver_state != '\0')
symver_state = NULL;
else
{
if (IS_NEWLINE (ch))
symver_state = NULL;
}
}
#endif
#ifdef TC_M68K
if (mri_state == NULL)
{
if ((state == 0 || state == 1)
&& ch == mri_pseudo[0])
mri_state = mri_pseudo + 1;
}
else
{
if (ch != '\0'
&& (*mri_state == ch
|| (*mri_state == ' '
&& lex[ch] == LEX_IS_WHITESPACE)
|| (*mri_state == '0'
&& ch == '1')))
{
mri_last_ch = ch;
++mri_state;
}
else if (*mri_state != '\0'
|| (lex[ch] != LEX_IS_WHITESPACE
&& lex[ch] != LEX_IS_NEWLINE))
{
mri_state = NULL;
}
else
{
do_scrub_begin (mri_last_ch == '1');
mri_state = NULL;
}
}
#endif
if (ch == EOF)
{
if (state != 0)
{
as_warn (_("end of file not at end of a line; newline inserted"));
state = 0;
PUT ('\n');
}
goto fromeof;
}
switch (lex[ch])
{
case LEX_IS_WHITESPACE:
do
{
ch = GET ();
}
while (ch != EOF && IS_WHITESPACE (ch));
if (ch == EOF)
goto fromeof;
if (state == 0)
{
state = 1;
UNGET (ch);
PUT (' ');
break;
}
#ifdef KEEP_WHITE_AROUND_COLON
if (lex[ch] == LEX_IS_COLON)
{
ch2 = GET ();
UNGET (ch2);
if (!IS_WHITESPACE (ch2))
{
state = 9;
UNGET (ch);
PUT (' ');
break;
}
}
#endif
if (IS_COMMENT (ch)
|| ch == '/'
|| IS_LINE_SEPARATOR (ch)
|| IS_PARALLEL_SEPARATOR (ch))
{
if (scrub_m68k_mri)
{
UNGET (ch);
PUT (' ');
break;
}
goto recycle;
}
if ((state == 2 || state == 11)
&& lex[ch] == LEX_IS_COLON
&& ! scrub_m68k_mri)
{
state = 1;
PUT (ch);
break;
}
switch (state)
{
case 0:
state++;
goto recycle;
case 1:
goto recycle;
case 2:
state = 3;
if (to + 1 < toend)
{
PUT (' ');
goto recycle;
}
UNGET (ch);
PUT (' ');
break;
case 3:
if (scrub_m68k_mri)
{
UNGET (ch);
PUT (' ');
break;
}
goto recycle;
case 9:
case 10:
if (scrub_m68k_mri)
{
state = 3;
UNGET (ch);
PUT (' ');
break;
}
state = 10;
goto recycle;
case 11:
if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
state = 1;
else
{
state = 3;
}
UNGET (ch);
PUT (' ');
break;
default:
BAD_CASE (state);
}
break;
case LEX_IS_TWOCHAR_COMMENT_1ST:
ch2 = GET ();
if (ch2 == '*')
{
for (;;)
{
do
{
ch2 = GET ();
if (ch2 != EOF && IS_NEWLINE (ch2))
add_newlines++;
}
while (ch2 != EOF && ch2 != '*');
while (ch2 == '*')
ch2 = GET ();
if (ch2 == EOF || ch2 == '/')
break;
UNGET (ch2);
}
if (ch2 == EOF)
as_warn (_("end of file in multiline comment"));
ch = ' ';
goto recycle;
}
#ifdef DOUBLESLASH_LINE_COMMENTS
else if (ch2 == '/')
{
do
{
ch = GET ();
}
while (ch != EOF && !IS_NEWLINE (ch));
if (ch == EOF)
as_warn ("end of file in comment; newline inserted");
state = 0;
PUT ('\n');
break;
}
#endif
else
{
if (ch2 != EOF)
UNGET (ch2);
if (state == 9 || state == 10)
state = 3;
PUT (ch);
}
break;
case LEX_IS_STRINGQUOTE:
if (state == 10)
{
UNGET (ch);
state = 3;
PUT (' ');
ch = GET ();
old_state = 3;
}
else if (state == 9)
old_state = 3;
else
old_state = state;
state = 5;
PUT (ch);
break;
#ifndef IEEE_STYLE
case LEX_IS_ONECHAR_QUOTE:
if (state == 10)
{
UNGET (ch);
state = 3;
PUT (' ');
break;
}
ch = GET ();
if (ch == EOF)
{
as_warn (_("end of file after a one-character quote; \\0 inserted"));
ch = 0;
}
if (ch == '\\')
{
ch = GET ();
if (ch == EOF)
{
as_warn (_("end of file in escape character"));
ch = '\\';
}
else
ch = process_escape (ch);
}
sprintf (out_buf, "%d", (int) (unsigned char) ch);
if ((ch = GET ()) != '\'')
{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
as_warn (_("missing close quote; (assumed)"));
#else
if (ch != EOF)
UNGET (ch);
#endif
}
if (strlen (out_buf) == 1)
{
PUT (out_buf[0]);
break;
}
if (state == 9)
old_state = 3;
else
old_state = state;
state = -1;
out_string = out_buf;
PUT (*out_string++);
break;
#endif
case LEX_IS_COLON:
#ifdef KEEP_WHITE_AROUND_COLON
state = 9;
#else
if (state == 9 || state == 10)
state = 3;
else if (state != 3)
state = 1;
#endif
PUT (ch);
break;
case LEX_IS_NEWLINE:
if (add_newlines)
{
--add_newlines;
UNGET (ch);
}
case LEX_IS_LINE_SEPARATOR:
state = 0;
PUT (ch);
break;
case LEX_IS_PARALLEL_SEPARATOR:
state = 1;
PUT (ch);
break;
#ifdef TC_V850
case LEX_IS_DOUBLEDASH_1ST:
ch2 = GET ();
if (ch2 != '-')
{
UNGET (ch2);
goto de_fault;
}
do
{
ch = GET ();
}
while (ch != EOF && ch != '\n');
if (ch == EOF)
as_warn (_("end of file in comment; newline inserted"));
state = 0;
PUT ('\n');
break;
#endif
#ifdef DOUBLEBAR_PARALLEL
case LEX_IS_DOUBLEBAR_1ST:
ch2 = GET ();
UNGET (ch2);
if (ch2 != '|')
goto de_fault;
state = 13;
PUT ('|');
break;
#endif
case LEX_IS_LINE_COMMENT_START:
if (ch == '/')
{
ch2 = GET ();
if (ch2 == '*')
{
old_state = 3;
state = -2;
break;
}
else
{
UNGET (ch2);
}
}
if (state == 0 || state == 1)
{
int startch;
startch = ch;
do
{
ch = GET ();
}
while (ch != EOF && IS_WHITESPACE (ch));
if (ch == EOF)
{
as_warn (_("end of file in comment; newline inserted"));
PUT ('\n');
break;
}
if (ch < '0' || ch > '9' || state != 0 || startch != '#')
{
while (ch != EOF && !IS_NEWLINE (ch))
ch = GET ();
if (ch == EOF)
as_warn (_("end of file in comment; newline inserted"));
state = 0;
PUT ('\n');
break;
}
UNGET (ch);
old_state = 4;
state = -1;
if (scrub_m68k_mri)
out_string = "\tappline ";
else
out_string = "\t.appline ";
PUT (*out_string++);
break;
}
#ifdef TC_D10V
if (state == 10)
PUT (' ');
#endif
if (strchr (tc_comment_chars, ch) == NULL
&& (! scrub_m68k_mri
|| (ch != '!' && ch != '*')))
goto de_fault;
if (scrub_m68k_mri
&& (ch == '!' || ch == '*' || ch == '#')
&& state != 1
&& state != 10)
goto de_fault;
case LEX_IS_COMMENT_START:
#if defined TC_ARM && defined OBJ_ELF
if ((symver_state != NULL) && (*symver_state == 0))
goto de_fault;
#endif
#ifdef WARN_COMMENTS
if (!found_comment)
as_where (&found_comment_file, &found_comment);
#endif
do
{
ch = GET ();
}
while (ch != EOF && !IS_NEWLINE (ch));
if (ch == EOF)
as_warn (_("end of file in comment; newline inserted"));
state = 0;
PUT ('\n');
break;
case LEX_IS_SYMBOL_COMPONENT:
if (state == 10)
{
UNGET (ch);
state = 3;
PUT (' ');
break;
}
if (state == 3)
state = 9;
if (to + 1 < toend
&& mri_state == NULL
#if defined TC_ARM && defined OBJ_ELF
&& symver_state == NULL
#endif
)
{
char *s;
int len;
for (s = from; s < fromend; s++)
{
int type;
ch2 = *(unsigned char *) s;
type = lex[ch2];
if (type != 0
&& type != LEX_IS_SYMBOL_COMPONENT)
break;
}
if (s > from)
--s;
len = s - from;
if (len > (toend - to) - 1)
len = (toend - to) - 1;
if (len > 0)
{
PUT (ch);
if (len > 8)
{
memcpy (to, from, len);
to += len;
from += len;
}
else
{
switch (len)
{
case 8: *to++ = *from++;
case 7: *to++ = *from++;
case 6: *to++ = *from++;
case 5: *to++ = *from++;
case 4: *to++ = *from++;
case 3: *to++ = *from++;
case 2: *to++ = *from++;
case 1: *to++ = *from++;
}
}
ch = GET ();
}
}
default:
de_fault:
if (state == 0)
{
state = 11;
}
else if (state == 1)
{
state = 2;
}
else if (state == 9)
{
if (!IS_SYMBOL_COMPONENT (ch))
state = 3;
}
else if (state == 10)
{
if (ch == '\\')
{
PUT (' ');
}
state = 3;
}
PUT (ch);
break;
}
}
fromeof:
return to - tostart;
tofull:
if (fromend > from)
{
saved_input = from;
saved_input_len = fromend - from;
}
else
saved_input = NULL;
return to - tostart;
}