#include "config.h"
#include "system.h"
#include "cpplib.h"
#include "internal.h"
#if !HAVE_ICONV
#define iconv_open(x, y) (errno = EINVAL, (iconv_t)-1)
#define iconv(a,b,c,d,e) (errno = EINVAL, (size_t)-1)
#define iconv_close(x) (void)0
#define ICONV_CONST
#endif
#if HOST_CHARSET == HOST_CHARSET_ASCII
#define SOURCE_CHARSET "UTF-8"
#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
#elif HOST_CHARSET == HOST_CHARSET_EBCDIC
#define SOURCE_CHARSET "UTF-EBCDIC"
#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF
#else
#error "Unrecognized basic host character set"
#endif
#ifndef EILSEQ
#define EILSEQ EINVAL
#endif
struct _cpp_strbuf
{
uchar *text;
size_t asize;
size_t len;
};
#define OUTBUF_BLOCK_SIZE 256
static inline int
one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
cppchar_t *cp)
{
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
cppchar_t c;
const uchar *inbuf = *inbufp;
size_t nbytes, i;
if (*inbytesleftp < 1)
return EINVAL;
c = *inbuf;
if (c < 0x80)
{
*cp = c;
*inbytesleftp -= 1;
*inbufp += 1;
return 0;
}
for (nbytes = 2; nbytes < 7; nbytes++)
if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
goto found;
return EILSEQ;
found:
if (*inbytesleftp < nbytes)
return EINVAL;
c = (c & masks[nbytes-1]);
inbuf++;
for (i = 1; i < nbytes; i++)
{
cppchar_t n = *inbuf++;
if ((n & 0xC0) != 0x80)
return EILSEQ;
c = ((c << 6) + (n & 0x3F));
}
if (c <= 0x7F && nbytes > 1) return EILSEQ;
if (c <= 0x7FF && nbytes > 2) return EILSEQ;
if (c <= 0xFFFF && nbytes > 3) return EILSEQ;
if (c <= 0x1FFFFF && nbytes > 4) return EILSEQ;
if (c <= 0x3FFFFFF && nbytes > 5) return EILSEQ;
if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF)) return EILSEQ;
*cp = c;
*inbufp = inbuf;
*inbytesleftp -= nbytes;
return 0;
}
static inline int
one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
{
static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
size_t nbytes;
uchar buf[6], *p = &buf[6];
uchar *outbuf = *outbufp;
nbytes = 1;
if (c < 0x80)
*--p = c;
else
{
do
{
*--p = ((c & 0x3F) | 0x80);
c >>= 6;
nbytes++;
}
while (c >= 0x3F || (c & limits[nbytes-1]));
*--p = (c | masks[nbytes-1]);
}
if (*outbytesleftp < nbytes)
return E2BIG;
while (p < &buf[6])
*outbuf++ = *p++;
*outbytesleftp -= nbytes;
*outbufp = outbuf;
return 0;
}
static inline int
one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
{
uchar *outbuf;
cppchar_t s = 0;
int rval;
if (*outbytesleftp < 4)
return E2BIG;
rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
if (rval)
return rval;
outbuf = *outbufp;
outbuf[bigend ? 3 : 0] = (s & 0x000000FF);
outbuf[bigend ? 2 : 1] = (s & 0x0000FF00) >> 8;
outbuf[bigend ? 1 : 2] = (s & 0x00FF0000) >> 16;
outbuf[bigend ? 0 : 3] = (s & 0xFF000000) >> 24;
*outbufp += 4;
*outbytesleftp -= 4;
return 0;
}
static inline int
one_utf32_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
{
cppchar_t s;
int rval;
const uchar *inbuf;
if (*inbytesleftp < 4)
return EINVAL;
inbuf = *inbufp;
s = inbuf[bigend ? 0 : 3] << 24;
s += inbuf[bigend ? 1 : 2] << 16;
s += inbuf[bigend ? 2 : 1] << 8;
s += inbuf[bigend ? 3 : 0];
if (s >= 0x7FFFFFFF || (s >= 0xD800 && s <= 0xDFFF))
return EILSEQ;
rval = one_cppchar_to_utf8 (s, outbufp, outbytesleftp);
if (rval)
return rval;
*inbufp += 4;
*inbytesleftp -= 4;
return 0;
}
static inline int
one_utf8_to_utf16 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
{
int rval;
cppchar_t s = 0;
const uchar *save_inbuf = *inbufp;
size_t save_inbytesleft = *inbytesleftp;
uchar *outbuf = *outbufp;
rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
if (rval)
return rval;
if (s > 0x0010FFFF)
{
*inbufp = save_inbuf;
*inbytesleftp = save_inbytesleft;
return EILSEQ;
}
if (s < 0xFFFF)
{
if (*outbytesleftp < 2)
{
*inbufp = save_inbuf;
*inbytesleftp = save_inbytesleft;
return E2BIG;
}
outbuf[bigend ? 1 : 0] = (s & 0x00FF);
outbuf[bigend ? 0 : 1] = (s & 0xFF00) >> 8;
*outbufp += 2;
*outbytesleftp -= 2;
return 0;
}
else
{
cppchar_t hi, lo;
if (*outbytesleftp < 4)
{
*inbufp = save_inbuf;
*inbytesleftp = save_inbytesleft;
return E2BIG;
}
hi = (s - 0x10000) / 0x400 + 0xD800;
lo = (s - 0x10000) % 0x400 + 0xDC00;
outbuf[bigend ? 1 : 0] = (hi & 0x00FF);
outbuf[bigend ? 0 : 1] = (hi & 0xFF00) >> 8;
outbuf[bigend ? 3 : 2] = (lo & 0x00FF);
outbuf[bigend ? 2 : 3] = (lo & 0xFF00) >> 8;
*outbufp += 4;
*outbytesleftp -= 4;
return 0;
}
}
static inline int
one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
uchar **outbufp, size_t *outbytesleftp)
{
cppchar_t s;
const uchar *inbuf = *inbufp;
int rval;
if (*inbytesleftp < 2)
return EINVAL;
s = inbuf[bigend ? 0 : 1] << 8;
s += inbuf[bigend ? 1 : 0];
if (s >= 0xDC00 && s <= 0xDFFF)
return EILSEQ;
else if (s >= 0xD800 && s <= 0xDBFF)
{
cppchar_t hi = s, lo;
if (*inbytesleftp < 4)
return EINVAL;
lo = inbuf[bigend ? 2 : 3] << 8;
lo += inbuf[bigend ? 3 : 2];
if (lo < 0xDC00 || lo > 0xDFFF)
return EILSEQ;
s = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
}
rval = one_cppchar_to_utf8 (s, outbufp, outbytesleftp);
if (rval)
return rval;
if (s <= 0xFFFF)
{
*inbufp += 2;
*inbytesleftp -= 2;
}
else
{
*inbufp += 4;
*inbytesleftp -= 4;
}
return 0;
}
static inline bool
conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
uchar **, size_t *),
iconv_t cd, const uchar *from, size_t flen, struct _cpp_strbuf *to)
{
const uchar *inbuf;
uchar *outbuf;
size_t inbytesleft, outbytesleft;
int rval;
inbuf = from;
inbytesleft = flen;
outbuf = to->text + to->len;
outbytesleft = to->asize - to->len;
for (;;)
{
do
rval = one_conversion (cd, &inbuf, &inbytesleft,
&outbuf, &outbytesleft);
while (inbytesleft && !rval);
if (__builtin_expect (inbytesleft == 0, 1))
{
to->len = to->asize - outbytesleft;
return true;
}
if (rval != E2BIG)
{
errno = rval;
return false;
}
outbytesleft += OUTBUF_BLOCK_SIZE;
to->asize += OUTBUF_BLOCK_SIZE;
to->text = xrealloc (to->text, to->asize);
outbuf = to->text + to->asize - outbytesleft;
}
}
static bool
convert_utf8_utf16 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_utf8_to_utf16, cd, from, flen, to);
}
static bool
convert_utf8_utf32 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_utf8_to_utf32, cd, from, flen, to);
}
static bool
convert_utf16_utf8 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_utf16_to_utf8, cd, from, flen, to);
}
static bool
convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
const uchar *from, size_t flen, struct _cpp_strbuf *to)
{
if (to->len + flen > to->asize)
{
to->asize = to->len + flen;
to->text = xrealloc (to->text, to->asize);
}
memcpy (to->text + to->len, from, flen);
to->len += flen;
return true;
}
#if HAVE_ICONV
static bool
convert_using_iconv (iconv_t cd, const uchar *from, size_t flen,
struct _cpp_strbuf *to)
{
ICONV_CONST char *inbuf;
char *outbuf;
size_t inbytesleft, outbytesleft;
if (iconv (cd, 0, 0, 0, 0) == (size_t)-1)
return false;
inbuf = (ICONV_CONST char *)from;
inbytesleft = flen;
outbuf = (char *)to->text + to->len;
outbytesleft = to->asize - to->len;
for (;;)
{
iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (__builtin_expect (inbytesleft == 0, 1))
{
to->len = to->asize - outbytesleft;
return true;
}
if (errno != E2BIG)
return false;
outbytesleft += OUTBUF_BLOCK_SIZE;
to->asize += OUTBUF_BLOCK_SIZE;
to->text = xrealloc (to->text, to->asize);
outbuf = (char *)to->text + to->asize - outbytesleft;
}
}
#else
#define convert_using_iconv 0
#endif
#define APPLY_CONVERSION(CONVERTER, FROM, FLEN, TO) \
CONVERTER.func (CONVERTER.cd, FROM, FLEN, TO)
struct conversion
{
const char *pair;
convert_f func;
iconv_t fake_cd;
};
static const struct conversion conversion_tab[] = {
{ "UTF-8/UTF-32LE", convert_utf8_utf32, (iconv_t)0 },
{ "UTF-8/UTF-32BE", convert_utf8_utf32, (iconv_t)1 },
{ "UTF-8/UTF-16LE", convert_utf8_utf16, (iconv_t)0 },
{ "UTF-8/UTF-16BE", convert_utf8_utf16, (iconv_t)1 },
{ "UTF-32LE/UTF-8", convert_utf32_utf8, (iconv_t)0 },
{ "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
{ "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
{ "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
};
static struct cset_converter
init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
{
struct cset_converter ret;
char *pair;
size_t i;
if (!strcasecmp (to, from))
{
ret.func = convert_no_conversion;
ret.cd = (iconv_t) -1;
return ret;
}
pair = alloca(strlen(to) + strlen(from) + 2);
strcpy(pair, from);
strcat(pair, "/");
strcat(pair, to);
for (i = 0; i < ARRAY_SIZE (conversion_tab); i++)
if (!strcasecmp (pair, conversion_tab[i].pair))
{
ret.func = conversion_tab[i].func;
ret.cd = conversion_tab[i].fake_cd;
return ret;
}
if (HAVE_ICONV)
{
ret.func = convert_using_iconv;
ret.cd = iconv_open (to, from);
if (ret.cd == (iconv_t) -1)
{
if (errno == EINVAL)
cpp_error (pfile, CPP_DL_ERROR,
"conversion from %s to %s not supported by iconv",
from, to);
else
cpp_errno (pfile, CPP_DL_ERROR, "iconv_open");
ret.func = convert_no_conversion;
}
}
else
{
cpp_error (pfile, CPP_DL_ERROR,
"no iconv implementation, cannot convert from %s to %s",
from, to);
ret.func = convert_no_conversion;
ret.cd = (iconv_t) -1;
}
return ret;
}
void
cpp_init_iconv (cpp_reader *pfile)
{
const char *ncset = CPP_OPTION (pfile, narrow_charset);
const char *wcset = CPP_OPTION (pfile, wide_charset);
const char *default_wcset;
bool be = CPP_OPTION (pfile, bytes_big_endian);
if (CPP_OPTION (pfile, wchar_precision) >= 32)
default_wcset = be ? "UTF-32BE" : "UTF-32LE";
else if (CPP_OPTION (pfile, wchar_precision) >= 16)
default_wcset = be ? "UTF-16BE" : "UTF-16LE";
else
default_wcset = SOURCE_CHARSET;
if (!ncset)
ncset = SOURCE_CHARSET;
if (!wcset)
wcset = default_wcset;
pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET);
}
void
_cpp_destroy_iconv (cpp_reader *pfile)
{
if (HAVE_ICONV)
{
if (pfile->narrow_cset_desc.func == convert_using_iconv)
iconv_close (pfile->narrow_cset_desc.cd);
if (pfile->wide_cset_desc.func == convert_using_iconv)
iconv_close (pfile->wide_cset_desc.cd);
}
}
cppchar_t
cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
{
uchar sbuf[1];
struct _cpp_strbuf tbuf;
if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR)
{
cpp_error (pfile, CPP_DL_ICE,
"character 0x%lx is not in the basic source character set\n",
(unsigned long)c);
return 0;
}
sbuf[0] = c;
tbuf.asize = 1;
tbuf.text = xmalloc (tbuf.asize);
tbuf.len = 0;
if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
{
cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
return 0;
}
if (tbuf.len != 1)
{
cpp_error (pfile, CPP_DL_ICE,
"character 0x%lx is not unibyte in execution character set",
(unsigned long)c);
return 0;
}
c = tbuf.text[0];
free(tbuf.text);
return c;
}
static inline size_t
width_to_mask (size_t width)
{
width = MIN (width, BITS_PER_CPPCHAR_T);
if (width >= CHAR_BIT * sizeof (size_t))
return ~(size_t) 0;
else
return ((size_t) 1 << width) - 1;
}
enum {
C99 = 1,
DIG = 2,
CXX = 4,
CID = 8,
NFC = 16,
NKC = 32,
CTX = 64
};
static const struct {
unsigned char flags;
unsigned char combine;
unsigned short end;
} ucnranges[] = {
#include "ucnid.h"
};
static int
ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
struct normalize_state *nst)
{
int mn, mx, md;
if (c > 0xFFFF)
return 0;
mn = 0;
mx = ARRAY_SIZE (ucnranges) - 1;
while (mx != mn)
{
md = (mn + mx) / 2;
if (c <= ucnranges[md].end)
mx = md;
else
mn = md + 1;
}
if (! (ucnranges[mn].flags & (C99 | CXX)))
return 0;
if (CPP_PEDANTIC (pfile)
&& ((CPP_OPTION (pfile, c99) && !(ucnranges[mn].flags & C99))
|| (CPP_OPTION (pfile, cplusplus)
&& !(ucnranges[mn].flags & CXX))))
return 0;
if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class)
nst->level = normalized_none;
else if (ucnranges[mn].flags & CTX)
{
bool safe;
cppchar_t p = nst->previous;
if (c == 0x09BE)
safe = p != 0x09C7;
else if (c == 0x0B3E)
safe = p != 0x0B47;
else if (c == 0x0BBE)
safe = p != 0x0BC6 && p != 0x0BC7;
else if (c == 0x0CC2)
safe = p != 0x0CC6;
else if (c == 0x0D3E)
safe = p != 0x0D46 && p != 0x0D47;
else if (c >= 0x1161 && c <= 0x1175)
safe = p < 0x1100 || p > 0x1112;
else if (c >= 0x11A8 && c <= 0x11C2)
safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0);
else
{
cpp_error (pfile, CPP_DL_ICE, "Character %x might not be NFKC", c);
safe = true;
}
if (!safe && c < 0x1161)
nst->level = normalized_none;
else if (!safe)
nst->level = MAX (nst->level, normalized_identifier_C);
}
else if (ucnranges[mn].flags & NKC)
;
else if (ucnranges[mn].flags & NFC)
nst->level = MAX (nst->level, normalized_C);
else if (ucnranges[mn].flags & CID)
nst->level = MAX (nst->level, normalized_identifier_C);
else
nst->level = normalized_none;
nst->previous = c;
nst->prev_class = ucnranges[mn].combine;
if (CPP_OPTION (pfile, c99) && (ucnranges[mn].flags & DIG))
return 2;
return 1;
}
cppchar_t
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
const uchar *limit, int identifier_pos,
struct normalize_state *nst)
{
cppchar_t result, c;
unsigned int length;
const uchar *str = *pstr;
const uchar *base = str - 2;
if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
cpp_error (pfile, CPP_DL_WARNING,
"universal character names are only valid in C++ and C99");
else if (CPP_WTRADITIONAL (pfile) && identifier_pos == 0)
cpp_error (pfile, CPP_DL_WARNING,
"the meaning of '\\%c' is different in traditional C",
(int) str[-1]);
if (str[-1] == 'u')
length = 4;
else if (str[-1] == 'U')
length = 8;
else
{
cpp_error (pfile, CPP_DL_ICE, "In _cpp_valid_ucn but not a UCN");
length = 4;
}
result = 0;
do
{
c = *str;
if (!ISXDIGIT (c))
break;
str++;
result = (result << 4) + hex_value (c);
}
while (--length && str < limit);
if (length && identifier_pos)
return 0;
*pstr = str;
if (length)
{
cpp_error (pfile, CPP_DL_ERROR,
"incomplete universal character name %.*s",
(int) (str - base), base);
result = 1;
}
else if ((result < 0xa0
&& (result != 0x24 && result != 0x40 && result != 0x60))
|| (result & 0x80000000)
|| (result >= 0xD800 && result <= 0xDFFF))
{
cpp_error (pfile, CPP_DL_ERROR,
"%.*s is not a valid universal character",
(int) (str - base), base);
result = 1;
}
else if (identifier_pos && result == 0x24
&& CPP_OPTION (pfile, dollars_in_ident))
{
if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
{
CPP_OPTION (pfile, warn_dollars) = 0;
cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
}
NORMALIZE_STATE_UPDATE_IDNUM (nst);
}
else if (identifier_pos)
{
int validity = ucn_valid_in_identifier (pfile, result, nst);
if (validity == 0)
cpp_error (pfile, CPP_DL_ERROR,
"universal character %.*s is not valid in an identifier",
(int) (str - base), base);
else if (validity == 2 && identifier_pos == 1)
cpp_error (pfile, CPP_DL_ERROR,
"universal character %.*s is not valid at the start of an identifier",
(int) (str - base), base);
}
if (result == 0)
result = 1;
return result;
}
static const uchar *
convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct _cpp_strbuf *tbuf, bool wide)
{
cppchar_t ucn;
uchar buf[6];
uchar *bufp = buf;
size_t bytesleft = 6;
int rval;
struct cset_converter cvt
= wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
from++;
ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst);
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
if (rval)
{
errno = rval;
cpp_errno (pfile, CPP_DL_ERROR,
"converting UCN to source character set");
}
else if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
cpp_errno (pfile, CPP_DL_ERROR,
"converting UCN to execution character set");
return from;
}
static void
emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
struct _cpp_strbuf *tbuf, bool wide)
{
if (wide)
{
bool bigend = CPP_OPTION (pfile, bytes_big_endian);
size_t width = CPP_OPTION (pfile, wchar_precision);
size_t cwidth = CPP_OPTION (pfile, char_precision);
size_t cmask = width_to_mask (cwidth);
size_t nbwc = width / cwidth;
size_t i;
size_t off = tbuf->len;
cppchar_t c;
if (tbuf->len + nbwc > tbuf->asize)
{
tbuf->asize += OUTBUF_BLOCK_SIZE;
tbuf->text = xrealloc (tbuf->text, tbuf->asize);
}
for (i = 0; i < nbwc; i++)
{
c = n & cmask;
n >>= cwidth;
tbuf->text[off + (bigend ? nbwc - i - 1 : i)] = c;
}
tbuf->len += nbwc;
}
else
{
if (tbuf->len + 1 > tbuf->asize)
{
tbuf->asize += OUTBUF_BLOCK_SIZE;
tbuf->text = xrealloc (tbuf->text, tbuf->asize);
}
tbuf->text[tbuf->len++] = n;
}
}
static const uchar *
convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct _cpp_strbuf *tbuf, bool wide)
{
cppchar_t c, n = 0, overflow = 0;
int digits_found = 0;
size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
: CPP_OPTION (pfile, char_precision));
size_t mask = width_to_mask (width);
if (CPP_WTRADITIONAL (pfile))
cpp_error (pfile, CPP_DL_WARNING,
"the meaning of '\\x' is different in traditional C");
from++;
while (from < limit)
{
c = *from;
if (! hex_p (c))
break;
from++;
overflow |= n ^ (n << 4 >> 4);
n = (n << 4) + hex_value (c);
digits_found = 1;
}
if (!digits_found)
{
cpp_error (pfile, CPP_DL_ERROR,
"\\x used with no following hex digits");
return from;
}
if (overflow | (n != (n & mask)))
{
cpp_error (pfile, CPP_DL_PEDWARN,
"hex escape sequence out of range");
n &= mask;
}
emit_numeric_escape (pfile, n, tbuf, wide);
return from;
}
static const uchar *
convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct _cpp_strbuf *tbuf, bool wide)
{
size_t count = 0;
cppchar_t c, n = 0;
size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
: CPP_OPTION (pfile, char_precision));
size_t mask = width_to_mask (width);
bool overflow = false;
while (from < limit && count++ < 3)
{
c = *from;
if (c < '0' || c > '7')
break;
from++;
overflow |= n ^ (n << 3 >> 3);
n = (n << 3) + c - '0';
}
if (n != (n & mask))
{
cpp_error (pfile, CPP_DL_PEDWARN,
"octal escape sequence out of range");
n &= mask;
}
emit_numeric_escape (pfile, n, tbuf, wide);
return from;
}
static const uchar *
convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct _cpp_strbuf *tbuf, bool wide)
{
#if HOST_CHARSET == HOST_CHARSET_ASCII
static const uchar charconsts[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
#elif HOST_CHARSET == HOST_CHARSET_EBCDIC
static const uchar charconsts[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
#else
#error "unknown host character set"
#endif
uchar c;
struct cset_converter cvt
= wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
c = *from;
switch (c)
{
case 'u': case 'U':
return convert_ucn (pfile, from, limit, tbuf, wide);
case 'x':
return convert_hex (pfile, from, limit, tbuf, wide);
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
return convert_oct (pfile, from, limit, tbuf, wide);
case '\\': case '\'': case '"': case '?': break;
case '(': case '{': case '[': case '%':
if (CPP_PEDANTIC (pfile))
goto unknown;
break;
case 'b': c = charconsts[1]; break;
case 'f': c = charconsts[3]; break;
case 'n': c = charconsts[4]; break;
case 'r': c = charconsts[5]; break;
case 't': c = charconsts[6]; break;
case 'v': c = charconsts[7]; break;
case 'a':
if (CPP_WTRADITIONAL (pfile))
cpp_error (pfile, CPP_DL_WARNING,
"the meaning of '\\a' is different in traditional C");
c = charconsts[0];
break;
case 'e': case 'E':
if (CPP_PEDANTIC (pfile))
cpp_error (pfile, CPP_DL_PEDWARN,
"non-ISO-standard escape sequence, '\\%c'", (int) c);
c = charconsts[2];
break;
default:
unknown:
if (ISGRAPH (c))
cpp_error (pfile, CPP_DL_PEDWARN,
"unknown escape sequence '\\%c'", (int) c);
else
cpp_error (pfile, CPP_DL_PEDWARN,
"unknown escape sequence: '\\%03o'", (int) c);
}
if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
cpp_errno (pfile, CPP_DL_ERROR,
"converting escape sequence to execution character set");
return from + 1;
}
bool
cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
cpp_string *to, bool wide, bool pascal_p)
{
struct _cpp_strbuf tbuf;
const uchar *p, *base, *limit;
size_t i;
size_t width = CPP_OPTION (pfile, wchar_precision);
size_t cwidth = CPP_OPTION (pfile, char_precision);
size_t pascal_string_max_length = width_to_mask (wide ? width : cwidth);
size_t pascal_string_length_byte_size = ((wide ? width : cwidth)/cwidth);
struct cset_converter cvt
= wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
tbuf.text = xmalloc (tbuf.asize);
tbuf.len = (pascal_p ? pascal_string_length_byte_size : 0);
for (i = 0; i < count; i++)
{
p = from[i].text;
if (*p == 'L') p++;
p++;
limit = from[i].text + from[i].len - 1;
if (pascal_p && p[0] == '\\' && p[1] == 'p')
p += 2;
for (;;)
{
base = p;
while (p < limit && *p != '\\')
p++;
if (p > base)
{
if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
goto fail;
}
if (p == limit)
break;
p = convert_escape (pfile, p + 1, limit, &tbuf, wide);
}
}
if (pascal_p)
{
if (wide)
{
size_t saved_tbuf_len = tbuf.len;
unsigned char uclen = (unsigned char) (saved_tbuf_len/pascal_string_length_byte_size - 1);
tbuf.len = 0;
APPLY_CONVERSION (cvt, &uclen, 1, &tbuf);
tbuf.len = saved_tbuf_len;
if (tbuf.len/pascal_string_length_byte_size > pascal_string_max_length)
cpp_error (pfile, CPP_DL_ERROR, "Pascal string is too long");
}
else
{
*tbuf.text = (unsigned char) (tbuf.len - 1);
if (tbuf.len > 256)
cpp_error (pfile, CPP_DL_ERROR, "Pascal string is too long");
}
}
emit_numeric_escape (pfile, 0, &tbuf, wide);
tbuf.text = xrealloc (tbuf.text, tbuf.len);
to->text = tbuf.text;
to->len = tbuf.len;
return true;
fail:
cpp_errno (pfile, CPP_DL_ERROR, "converting to execution character set");
free (tbuf.text);
return false;
}
bool
cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
size_t count, cpp_string *to, bool wide,
bool pascal_p)
{
struct cset_converter save_narrow_cset_desc = pfile->narrow_cset_desc;
bool retval;
pfile->narrow_cset_desc.func = convert_no_conversion;
pfile->narrow_cset_desc.cd = (iconv_t) -1;
retval = cpp_interpret_string (pfile, from, count, to, wide, pascal_p);
pfile->narrow_cset_desc = save_narrow_cset_desc;
return retval;
}
static cppchar_t
narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
unsigned int *pchars_seen, int *unsignedp)
{
size_t width = CPP_OPTION (pfile, char_precision);
size_t max_chars = CPP_OPTION (pfile, int_precision) / width;
size_t mask = width_to_mask (width);
size_t i;
cppchar_t result, c;
bool unsigned_p;
result = 0;
for (i = 0; i < str.len - 1; i++)
{
c = str.text[i] & mask;
if (width < BITS_PER_CPPCHAR_T)
result = (result << width) | c;
else
result = c;
}
if (i > max_chars)
{
i = max_chars;
cpp_error (pfile, CPP_DL_WARNING,
"character constant too long for its type");
}
else if ((i == 4 && CPP_OPTION (pfile, warn_four_char_constants))
|| (i > 1 && i != 4 && CPP_OPTION (pfile, warn_multichar)))
cpp_error (pfile, CPP_DL_WARNING, "multi-character character constant");
if (i > 1)
unsigned_p = 0;
else
unsigned_p = CPP_OPTION (pfile, unsigned_char);
if (i > 1)
width = CPP_OPTION (pfile, int_precision);
if (width < BITS_PER_CPPCHAR_T)
{
mask = ((cppchar_t) 1 << width) - 1;
if (unsigned_p || !(result & (1 << (width - 1))))
result &= mask;
else
result |= ~mask;
}
*pchars_seen = i;
*unsignedp = unsigned_p;
return result;
}
static cppchar_t
wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
unsigned int *pchars_seen, int *unsignedp)
{
bool bigend = CPP_OPTION (pfile, bytes_big_endian);
size_t width = CPP_OPTION (pfile, wchar_precision);
size_t cwidth = CPP_OPTION (pfile, char_precision);
size_t mask = width_to_mask (width);
size_t cmask = width_to_mask (cwidth);
size_t nbwc = width / cwidth;
size_t off, i;
cppchar_t result = 0, c;
off = str.len - (nbwc * 2);
result = 0;
for (i = 0; i < nbwc; i++)
{
c = bigend ? str.text[off + i] : str.text[off + nbwc - i - 1];
result = (result << cwidth) | (c & cmask);
}
if (off > 0)
cpp_error (pfile, CPP_DL_WARNING,
"character constant too long for its type");
if (width < BITS_PER_CPPCHAR_T)
{
if (CPP_OPTION (pfile, unsigned_wchar) || !(result & (1 << (width - 1))))
result &= mask;
else
result |= ~mask;
}
*unsignedp = CPP_OPTION (pfile, unsigned_wchar);
*pchars_seen = 1;
return result;
}
cppchar_t
cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
unsigned int *pchars_seen, int *unsignedp)
{
cpp_string str = { 0, 0 };
bool wide = (token->type == CPP_WCHAR);
cppchar_t result;
if (token->val.str.len == (size_t) (2 + wide))
{
cpp_error (pfile, CPP_DL_ERROR, "empty character constant");
return 0;
}
else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, wide, false))
return 0;
if (wide)
result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp);
else
result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp);
if (str.text != token->val.str.text)
free ((void *)str.text);
return result;
}
cpp_hashnode *
_cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
{
uchar * buf = alloca (len + 1);
uchar * bufp = buf;
size_t idp;
for (idp = 0; idp < len; idp++)
if (id[idp] != '\\')
*bufp++ = id[idp];
else
{
unsigned length = id[idp+1] == 'u' ? 4 : 8;
cppchar_t value = 0;
size_t bufleft = len - (bufp - buf);
int rval;
idp += 2;
while (length && idp < len && ISXDIGIT (id[idp]))
{
value = (value << 4) + hex_value (id[idp]);
idp++;
length--;
}
idp--;
if (value == 0x24)
{
*bufp++ = '$';
continue;
}
rval = one_cppchar_to_utf8 (value, &bufp, &bufleft);
if (rval)
{
errno = rval;
cpp_errno (pfile, CPP_DL_ERROR,
"converting UCN to source character set");
break;
}
}
return CPP_HASHNODE (ht_lookup (pfile->hash_table,
buf, bufp - buf, HT_ALLOC));
}
uchar *
_cpp_convert_input (cpp_reader *pfile, const char *input_charset,
uchar *input, size_t size, size_t len, off_t *st_size)
{
struct cset_converter input_cset;
struct _cpp_strbuf to;
input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
if (input_cset.func == convert_no_conversion)
{
to.text = input;
to.asize = size;
to.len = len;
}
else
{
to.asize = MAX (65536, len);
to.text = xmalloc (to.asize);
to.len = 0;
if (!APPLY_CONVERSION (input_cset, input, len, &to))
cpp_error (pfile, CPP_DL_ERROR,
"failure to convert %s to %s",
CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
free (input);
}
if (input_cset.func == convert_using_iconv)
iconv_close (input_cset.cd);
if (to.len + 4096 < to.asize || to.len >= to.asize)
to.text = xrealloc (to.text, to.len + 1);
if (to.text[to.len - 1] == '\r')
to.text[to.len] = '\r';
else
to.text[to.len] = '\n';
*st_size = to.len;
return to.text;
}
const char *
_cpp_default_encoding (void)
{
const char *current_encoding = NULL;
#if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET) && 0
setlocale (LC_CTYPE, "");
current_encoding = nl_langinfo (CODESET);
#endif
if (current_encoding == NULL || *current_encoding == '\0')
current_encoding = SOURCE_CHARSET;
return current_encoding;
}