#include "config.h"
#include "Lexer.h"
#include "JSFunction.h"
#include "JSGlobalObjectFunctions.h"
#include "NodeInfo.h"
#include "Nodes.h"
#include "dtoa.h"
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>
using namespace WTF;
using namespace Unicode;
using namespace JSC;
#ifndef KDE_USE_FINAL
#include "Grammar.h"
#endif
#include "Lookup.h"
#include "Lexer.lut.h"
int jscyylex(void* lvalp, void* llocp, void* globalData)
{
return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
}
namespace JSC {
static const UChar byteOrderMark = 0xFEFF;
Lexer::Lexer(JSGlobalData* globalData)
: m_isReparsing(false)
, m_globalData(globalData)
, m_keywordTable(JSC::mainTable)
{
m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
}
Lexer::~Lexer()
{
m_keywordTable.deleteTable();
}
inline const UChar* Lexer::currentCharacter() const
{
return m_code - 4;
}
inline int Lexer::currentOffset() const
{
return currentCharacter() - m_codeStart;
}
ALWAYS_INLINE void Lexer::shift1()
{
m_current = m_next1;
m_next1 = m_next2;
m_next2 = m_next3;
if (LIKELY(m_code < m_codeEnd))
m_next3 = m_code[0];
else
m_next3 = -1;
++m_code;
}
ALWAYS_INLINE void Lexer::shift2()
{
m_current = m_next2;
m_next1 = m_next3;
if (LIKELY(m_code + 1 < m_codeEnd)) {
m_next2 = m_code[0];
m_next3 = m_code[1];
} else {
m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
m_next3 = -1;
}
m_code += 2;
}
ALWAYS_INLINE void Lexer::shift3()
{
m_current = m_next3;
if (LIKELY(m_code + 2 < m_codeEnd)) {
m_next1 = m_code[0];
m_next2 = m_code[1];
m_next3 = m_code[2];
} else {
m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
m_next3 = -1;
}
m_code += 3;
}
ALWAYS_INLINE void Lexer::shift4()
{
if (LIKELY(m_code + 3 < m_codeEnd)) {
m_current = m_code[0];
m_next1 = m_code[1];
m_next2 = m_code[2];
m_next3 = m_code[3];
} else {
m_current = m_code < m_codeEnd ? m_code[0] : -1;
m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
m_next3 = -1;
}
m_code += 4;
}
void Lexer::setCode(const SourceCode& source)
{
m_lineNumber = source.firstLine();
m_delimited = false;
m_lastToken = -1;
const UChar* data = source.provider()->data();
m_source = &source;
m_codeStart = data;
m_code = data + source.startOffset();
m_codeEnd = data + source.endOffset();
m_error = false;
m_atLineStart = true;
if (source.provider()->hasBOMs()) {
for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
if (UNLIKELY(*p == byteOrderMark)) {
copyCodeWithoutBOMs();
break;
}
}
}
shift4();
ASSERT(currentOffset() == source.startOffset());
}
void Lexer::copyCodeWithoutBOMs()
{
m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
for (const UChar* p = m_code; p < m_codeEnd; ++p) {
UChar c = *p;
if (c != byteOrderMark)
m_codeWithoutBOMs.append(c);
}
ptrdiff_t startDelta = m_codeStart - m_code;
m_code = m_codeWithoutBOMs.data();
m_codeStart = m_code + startDelta;
m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
}
void Lexer::shiftLineTerminator()
{
ASSERT(isLineTerminator(m_current));
if (m_current + m_next1 == '\n' + '\r')
shift2();
else
shift1();
++m_lineNumber;
}
ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
{
m_identifiers.append(Identifier(m_globalData, characters, length));
return &m_identifiers.last();
}
inline bool Lexer::lastTokenWasRestrKeyword() const
{
return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
}
static NEVER_INLINE bool isNonASCIIIdentStart(int c)
{
return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
}
static inline bool isIdentStart(int c)
{
return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
}
static NEVER_INLINE bool isNonASCIIIdentPart(int c)
{
return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
| Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
}
static inline bool isIdentPart(int c)
{
return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
}
static inline int singleEscape(int c)
{
switch (c) {
case 'b':
return 0x08;
case 't':
return 0x09;
case 'n':
return 0x0A;
case 'v':
return 0x0B;
case 'f':
return 0x0C;
case 'r':
return 0x0D;
default:
return c;
}
}
inline void Lexer::record8(int c)
{
ASSERT(c >= 0);
ASSERT(c <= 0xFF);
m_buffer8.append(static_cast<char>(c));
}
inline void Lexer::record16(UChar c)
{
m_buffer16.append(c);
}
inline void Lexer::record16(int c)
{
ASSERT(c >= 0);
ASSERT(c <= USHRT_MAX);
record16(UChar(static_cast<unsigned short>(c)));
}
int Lexer::lex(void* p1, void* p2)
{
ASSERT(!m_error);
ASSERT(m_buffer8.isEmpty());
ASSERT(m_buffer16.isEmpty());
YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
int token = 0;
m_terminator = false;
start:
while (isWhiteSpace(m_current))
shift1();
int startOffset = currentOffset();
if (m_current == -1) {
if (!m_terminator && !m_delimited && !m_isReparsing) {
token = ';';
goto doneSemicolon;
}
return 0;
}
m_delimited = false;
switch (m_current) {
case '>':
if (m_next1 == '>' && m_next2 == '>') {
if (m_next3 == '=') {
shift4();
token = URSHIFTEQUAL;
break;
}
shift3();
token = URSHIFT;
break;
}
if (m_next1 == '>') {
if (m_next2 == '=') {
shift3();
token = RSHIFTEQUAL;
break;
}
shift2();
token = RSHIFT;
break;
}
if (m_next1 == '=') {
shift2();
token = GE;
break;
}
shift1();
token = '>';
break;
case '=':
if (m_next1 == '=') {
if (m_next2 == '=') {
shift3();
token = STREQ;
break;
}
shift2();
token = EQEQ;
break;
}
shift1();
token = '=';
break;
case '!':
if (m_next1 == '=') {
if (m_next2 == '=') {
shift3();
token = STRNEQ;
break;
}
shift2();
token = NE;
break;
}
shift1();
token = '!';
break;
case '<':
if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
shift4();
goto inSingleLineComment;
}
if (m_next1 == '<') {
if (m_next2 == '=') {
shift3();
token = LSHIFTEQUAL;
break;
}
shift2();
token = LSHIFT;
break;
}
if (m_next1 == '=') {
shift2();
token = LE;
break;
}
shift1();
token = '<';
break;
case '+':
if (m_next1 == '+') {
shift2();
if (m_terminator) {
token = AUTOPLUSPLUS;
break;
}
token = PLUSPLUS;
break;
}
if (m_next1 == '=') {
shift2();
token = PLUSEQUAL;
break;
}
shift1();
token = '+';
break;
case '-':
if (m_next1 == '-') {
if (m_atLineStart && m_next2 == '>') {
shift3();
goto inSingleLineComment;
}
shift2();
if (m_terminator) {
token = AUTOMINUSMINUS;
break;
}
token = MINUSMINUS;
break;
}
if (m_next1 == '=') {
shift2();
token = MINUSEQUAL;
break;
}
shift1();
token = '-';
break;
case '*':
if (m_next1 == '=') {
shift2();
token = MULTEQUAL;
break;
}
shift1();
token = '*';
break;
case '/':
if (m_next1 == '/') {
shift2();
goto inSingleLineComment;
}
if (m_next1 == '*')
goto inMultiLineComment;
if (m_next1 == '=') {
shift2();
token = DIVEQUAL;
break;
}
shift1();
token = '/';
break;
case '&':
if (m_next1 == '&') {
shift2();
token = AND;
break;
}
if (m_next1 == '=') {
shift2();
token = ANDEQUAL;
break;
}
shift1();
token = '&';
break;
case '^':
if (m_next1 == '=') {
shift2();
token = XOREQUAL;
break;
}
shift1();
token = '^';
break;
case '%':
if (m_next1 == '=') {
shift2();
token = MODEQUAL;
break;
}
shift1();
token = '%';
break;
case '|':
if (m_next1 == '=') {
shift2();
token = OREQUAL;
break;
}
if (m_next1 == '|') {
shift2();
token = OR;
break;
}
shift1();
token = '|';
break;
case '.':
if (isASCIIDigit(m_next1)) {
record8('.');
shift1();
goto inNumberAfterDecimalPoint;
}
token = '.';
shift1();
break;
case ',':
case '~':
case '?':
case ':':
case '(':
case ')':
case '[':
case ']':
token = m_current;
shift1();
break;
case ';':
shift1();
m_delimited = true;
token = ';';
break;
case '{':
lvalp->intValue = currentOffset();
shift1();
token = OPENBRACE;
break;
case '}':
lvalp->intValue = currentOffset();
shift1();
m_delimited = true;
token = CLOSEBRACE;
break;
case '\\':
goto startIdentifierWithBackslash;
case '0':
goto startNumberWithZeroDigit;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
goto startNumber;
case '"':
case '\'':
goto startString;
default:
if (isIdentStart(m_current))
goto startIdentifierOrKeyword;
if (isLineTerminator(m_current)) {
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
if (lastTokenWasRestrKeyword()) {
token = ';';
goto doneSemicolon;
}
goto start;
}
goto returnError;
}
m_atLineStart = false;
goto returnToken;
startString: {
int stringQuoteCharacter = m_current;
shift1();
const UChar* stringStart = currentCharacter();
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
m_buffer16.append(stringStart, currentCharacter() - stringStart);
goto inString;
}
shift1();
}
lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
shift1();
m_atLineStart = false;
m_delimited = false;
token = STRING;
goto returnToken;
inString:
while (m_current != stringQuoteCharacter) {
if (m_current == '\\')
goto inStringEscapeSequence;
if (UNLIKELY(isLineTerminator(m_current)))
goto returnError;
if (UNLIKELY(m_current == -1))
goto returnError;
record16(m_current);
shift1();
}
goto doneString;
inStringEscapeSequence:
shift1();
if (m_current == 'x') {
shift1();
if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
record16(convertHex(m_current, m_next1));
shift2();
goto inString;
}
record16('x');
if (m_current == stringQuoteCharacter)
goto doneString;
goto inString;
}
if (m_current == 'u') {
shift1();
if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
shift4();
goto inString;
}
if (m_current == stringQuoteCharacter) {
record16('u');
goto doneString;
}
goto returnError;
}
if (isASCIIOctalDigit(m_current)) {
if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
shift3();
goto inString;
}
if (isASCIIOctalDigit(m_next1)) {
record16((m_current - '0') * 8 + m_next1 - '0');
shift2();
goto inString;
}
record16(m_current - '0');
shift1();
goto inString;
}
if (isLineTerminator(m_current)) {
shiftLineTerminator();
goto inString;
}
record16(singleEscape(m_current));
shift1();
goto inString;
}
startIdentifierWithBackslash:
shift1();
if (UNLIKELY(m_current != 'u'))
goto returnError;
shift1();
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
goto returnError;
token = convertUnicode(m_current, m_next1, m_next2, m_next3);
if (UNLIKELY(!isIdentStart(token)))
goto returnError;
goto inIdentifierAfterCharacterCheck;
startIdentifierOrKeyword: {
const UChar* identifierStart = currentCharacter();
shift1();
while (isIdentPart(m_current))
shift1();
if (LIKELY(m_current != '\\')) {
lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
goto doneIdentifierOrKeyword;
}
m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
}
do {
shift1();
if (UNLIKELY(m_current != 'u'))
goto returnError;
shift1();
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
goto returnError;
token = convertUnicode(m_current, m_next1, m_next2, m_next3);
if (UNLIKELY(!isIdentPart(token)))
goto returnError;
inIdentifierAfterCharacterCheck:
record16(token);
shift4();
while (isIdentPart(m_current)) {
record16(m_current);
shift1();
}
} while (UNLIKELY(m_current == '\\'));
goto doneIdentifier;
inSingleLineComment:
while (!isLineTerminator(m_current)) {
if (UNLIKELY(m_current == -1))
return 0;
shift1();
}
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
if (lastTokenWasRestrKeyword())
goto doneSemicolon;
goto start;
inMultiLineComment:
shift2();
while (m_current != '*' || m_next1 != '/') {
if (isLineTerminator(m_current))
shiftLineTerminator();
else {
shift1();
if (UNLIKELY(m_current == -1))
goto returnError;
}
}
shift2();
m_atLineStart = false;
goto start;
startNumberWithZeroDigit:
shift1();
if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
shift1();
goto inHex;
}
if (m_current == '.') {
record8('0');
record8('.');
shift1();
goto inNumberAfterDecimalPoint;
}
if ((m_current | 0x20) == 'e') {
record8('0');
record8('e');
shift1();
goto inExponentIndicator;
}
if (isASCIIOctalDigit(m_current))
goto inOctal;
if (isASCIIDigit(m_current))
goto startNumber;
lvalp->doubleValue = 0;
goto doneNumeric;
inNumberAfterDecimalPoint:
while (isASCIIDigit(m_current)) {
record8(m_current);
shift1();
}
if ((m_current | 0x20) == 'e') {
record8('e');
shift1();
goto inExponentIndicator;
}
goto doneNumber;
inExponentIndicator:
if (m_current == '+' || m_current == '-') {
record8(m_current);
shift1();
}
if (!isASCIIDigit(m_current))
goto returnError;
do {
record8(m_current);
shift1();
} while (isASCIIDigit(m_current));
goto doneNumber;
inOctal: {
do {
record8(m_current);
shift1();
} while (isASCIIOctalDigit(m_current));
if (isASCIIDigit(m_current))
goto startNumber;
double dval = 0;
const char* end = m_buffer8.end();
for (const char* p = m_buffer8.data(); p < end; ++p) {
dval *= 8;
dval += *p - '0';
}
if (dval >= mantissaOverflowLowerBound)
dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
m_buffer8.resize(0);
lvalp->doubleValue = dval;
goto doneNumeric;
}
inHex: {
do {
record8(m_current);
shift1();
} while (isASCIIHexDigit(m_current));
double dval = 0;
const char* end = m_buffer8.end();
for (const char* p = m_buffer8.data(); p < end; ++p) {
dval *= 16;
dval += toASCIIHexValue(*p);
}
if (dval >= mantissaOverflowLowerBound)
dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
m_buffer8.resize(0);
lvalp->doubleValue = dval;
goto doneNumeric;
}
startNumber:
record8(m_current);
shift1();
while (isASCIIDigit(m_current)) {
record8(m_current);
shift1();
}
if (m_current == '.') {
record8('.');
shift1();
goto inNumberAfterDecimalPoint;
}
if ((m_current | 0x20) == 'e') {
record8('e');
shift1();
goto inExponentIndicator;
}
doneNumber:
m_buffer8.append('\0');
lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
m_buffer8.resize(0);
doneNumeric:
if (UNLIKELY(isIdentStart(m_current)))
goto returnError;
m_atLineStart = false;
m_delimited = false;
token = NUMBER;
goto returnToken;
doneSemicolon:
token = ';';
m_delimited = true;
goto returnToken;
doneIdentifier:
m_atLineStart = false;
m_delimited = false;
lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
m_buffer16.resize(0);
token = IDENT;
goto returnToken;
doneIdentifierOrKeyword: {
m_atLineStart = false;
m_delimited = false;
m_buffer16.resize(0);
const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
token = entry ? entry->lexerValue() : IDENT;
goto returnToken;
}
doneString:
shift1();
m_atLineStart = false;
m_delimited = false;
lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
m_buffer16.resize(0);
token = STRING;
returnToken: {
int lineNumber = m_lineNumber;
llocp->first_line = lineNumber;
llocp->last_line = lineNumber;
llocp->first_column = startOffset;
llocp->last_column = currentOffset();
m_lastToken = token;
return token;
}
returnError:
m_error = true;
return -1;
}
bool Lexer::scanRegExp()
{
ASSERT(m_buffer16.isEmpty());
bool lastWasEscape = false;
bool inBrackets = false;
while (true) {
if (isLineTerminator(m_current) || m_current == -1)
return false;
if (m_current != '/' || lastWasEscape || inBrackets) {
if (!lastWasEscape) {
if (m_current == '[' && !inBrackets)
inBrackets = true;
if (m_current == ']' && inBrackets)
inBrackets = false;
}
record16(m_current);
lastWasEscape = !lastWasEscape && m_current == '\\';
} else { m_pattern = UString(m_buffer16);
m_buffer16.resize(0);
shift1();
break;
}
shift1();
}
while (isIdentPart(m_current)) {
record16(m_current);
shift1();
}
m_flags = UString(m_buffer16);
m_buffer16.resize(0);
return true;
}
void Lexer::clear()
{
m_identifiers.clear();
m_codeWithoutBOMs.clear();
Vector<char> newBuffer8;
newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer8.swap(newBuffer8);
Vector<UChar> newBuffer16;
newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer16.swap(newBuffer16);
m_isReparsing = false;
m_pattern = UString();
m_flags = UString();
}
SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
{
if (m_codeWithoutBOMs.isEmpty())
return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
const UChar* data = m_source->provider()->data();
ASSERT(openBrace < closeBrace);
int numBOMsBeforeOpenBrace = 0;
int numBOMsBetweenBraces = 0;
int i;
for (i = m_source->startOffset(); i < openBrace; ++i)
numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
for (; i < closeBrace; ++i)
numBOMsBetweenBraces += data[i] == byteOrderMark;
return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
}
}