#include "config.h"
#include "Lexer.h"
#include "JSFunctionInlines.h"
#include "BuiltinNames.h"
#include "JSGlobalObjectFunctions.h"
#include "Identifier.h"
#include "NodeInfo.h"
#include "Nodes.h"
#include "JSCInlines.h"
#include <wtf/dtoa.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>
#include "KeywordLookup.h"
#include "Lexer.lut.h"
#include "Parser.h"
namespace JSC {
Keywords::Keywords(VM& vm)
: m_vm(vm)
, m_keywordTable(JSC::mainTable)
{
}
enum CharacterType {
CharacterIdentifierStart,
CharacterZero,
CharacterNumber,
CharacterInvalid,
CharacterLineTerminator,
CharacterExclamationMark,
CharacterOpenParen,
CharacterCloseParen,
CharacterOpenBracket,
CharacterCloseBracket,
CharacterComma,
CharacterColon,
CharacterQuestion,
CharacterTilde,
CharacterQuote,
CharacterDot,
CharacterSlash,
CharacterBackSlash,
CharacterSemicolon,
CharacterOpenBrace,
CharacterCloseBrace,
CharacterAdd,
CharacterSub,
CharacterMultiply,
CharacterModulo,
CharacterAnd,
CharacterXor,
CharacterOr,
CharacterLess,
CharacterGreater,
CharacterEqual,
CharacterWhiteSpace,
CharacterPrivateIdentifierStart
};
static const unsigned short typesOfLatin1Characters[256] = {
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterWhiteSpace,
CharacterLineTerminator,
CharacterWhiteSpace,
CharacterWhiteSpace,
CharacterLineTerminator,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterWhiteSpace,
CharacterExclamationMark,
CharacterQuote,
CharacterInvalid,
CharacterIdentifierStart,
CharacterModulo,
CharacterAnd,
CharacterQuote,
CharacterOpenParen,
CharacterCloseParen,
CharacterMultiply,
CharacterAdd,
CharacterComma,
CharacterSub,
CharacterDot,
CharacterSlash,
CharacterZero,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterColon,
CharacterSemicolon,
CharacterLess,
CharacterEqual,
CharacterGreater,
CharacterQuestion,
CharacterPrivateIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterOpenBracket,
CharacterBackSlash,
CharacterCloseBracket,
CharacterXor,
CharacterIdentifierStart,
CharacterInvalid,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterOpenBrace,
CharacterOr,
CharacterCloseBrace,
CharacterTilde,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterWhiteSpace,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterInvalid,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterInvalid,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart
};
static const LChar singleCharacterEscapeValuesForASCII[128] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
' ',
'!',
'"',
'#',
'$',
'%',
'&',
'\'',
'(',
')',
'*',
'+',
',',
'-',
'.',
'/',
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
':',
';',
'<',
'=',
'>',
'?',
'@',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z',
'[',
'\\',
']',
'^',
'_',
'`',
'a',
0x08,
'c',
'd',
'e',
0x0C,
'g',
'h',
'i',
'j',
'k',
'l',
'm',
0x0A,
'o',
'p',
'q',
0x0D,
's',
0x09,
0,
0x0B,
'w',
0,
'y',
'z',
'{',
'|',
'}',
'~',
0
};
template <typename T>
Lexer<T>::Lexer(VM* vm, JSParserStrictness strictness)
: m_isReparsing(false)
, m_vm(vm)
, m_parsingBuiltinFunction(strictness == JSParseBuiltin)
{
}
template <typename T>
Lexer<T>::~Lexer()
{
}
template <typename T>
String Lexer<T>::invalidCharacterMessage() const
{
switch (m_current) {
case 0:
return "Invalid character: '\\0'";
case 10:
return "Invalid character: '\\n'";
case 11:
return "Invalid character: '\\v'";
case 13:
return "Invalid character: '\\r'";
case 35:
return "Invalid character: '#'";
case 64:
return "Invalid character: '@'";
case 96:
return "Invalid character: '`'";
default:
return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
}
}
template <typename T>
ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
{
ASSERT(m_code <= m_codeEnd);
return m_code;
}
template <typename T>
void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
{
m_arena = &arena->identifierArena();
m_lineNumber = source.firstLine();
m_lastToken = -1;
const String& sourceString = source.provider()->source();
if (!sourceString.isNull())
setCodeStart(sourceString.impl());
else
m_codeStart = 0;
m_source = &source;
m_sourceOffset = source.startOffset();
m_codeStartPlusOffset = m_codeStart + source.startOffset();
m_code = m_codeStartPlusOffset;
m_codeEnd = m_codeStart + source.endOffset();
m_error = false;
m_atLineStart = true;
m_lineStart = m_code;
m_lexErrorMessage = String();
m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
if (LIKELY(m_code < m_codeEnd))
m_current = *m_code;
else
m_current = 0;
ASSERT(currentOffset() == source.startOffset());
}
template <typename T>
template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
{
m_code += shiftAmount;
ASSERT(currentOffset() >= currentLineStartOffset());
m_current = *m_code;
}
template <typename T>
ALWAYS_INLINE void Lexer<T>::shift()
{
m_current = 0;
++m_code;
if (LIKELY(m_code < m_codeEnd))
m_current = *m_code;
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::atEnd() const
{
ASSERT(!m_current || m_code < m_codeEnd);
return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
}
template <typename T>
ALWAYS_INLINE T Lexer<T>::peek(int offset) const
{
ASSERT(offset > 0 && offset < 5);
const T* code = m_code + offset;
return (code < m_codeEnd) ? *code : 0;
}
template <typename T>
typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex()
{
T char1 = peek(1);
T char2 = peek(2);
T char3 = peek(3);
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex);
int result = convertUnicode(m_current, char1, char2, char3);
shift();
shift();
shift();
shift();
return UnicodeHexValue(result);
}
template <typename T>
void Lexer<T>::shiftLineTerminator()
{
ASSERT(isLineTerminator(m_current));
m_positionBeforeLastNewline = currentPosition();
T prev = m_current;
shift();
if (prev + m_current == '\n' + '\r')
shift();
++m_lineNumber;
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
{
return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
}
static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
{
return U_GET_GC_MASK(c) & U_GC_L_MASK;
}
static ALWAYS_INLINE bool isLatin1(LChar)
{
return true;
}
static ALWAYS_INLINE bool isLatin1(UChar c)
{
return c < 256;
}
static inline bool isIdentStart(LChar c)
{
return typesOfLatin1Characters[c] == CharacterIdentifierStart;
}
static inline bool isIdentStart(UChar c)
{
return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
}
static NEVER_INLINE bool isNonLatin1IdentPart(int c)
{
return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
}
static ALWAYS_INLINE bool isIdentPart(LChar c)
{
return typesOfLatin1Characters[c] <= CharacterNumber;
}
static ALWAYS_INLINE bool isIdentPart(UChar c)
{
return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
}
static inline LChar singleEscape(int c)
{
if (c < 128) {
ASSERT(static_cast<size_t>(c) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII));
return singleCharacterEscapeValuesForASCII[c];
}
return 0;
}
template <typename T>
inline void Lexer<T>::record8(int c)
{
ASSERT(c >= 0);
ASSERT(c <= 0xFF);
m_buffer8.append(static_cast<LChar>(c));
}
template <typename T>
inline void assertCharIsIn8BitRange(T c)
{
UNUSED_PARAM(c);
ASSERT(c >= 0);
ASSERT(c <= 0xFF);
}
template <>
inline void assertCharIsIn8BitRange(UChar c)
{
UNUSED_PARAM(c);
ASSERT(c <= 0xFF);
}
template <>
inline void assertCharIsIn8BitRange(LChar)
{
}
template <typename T>
inline void Lexer<T>::append8(const T* p, size_t length)
{
size_t currentSize = m_buffer8.size();
m_buffer8.grow(currentSize + length);
LChar* rawBuffer = m_buffer8.data() + currentSize;
for (size_t i = 0; i < length; i++) {
T c = p[i];
assertCharIsIn8BitRange(c);
rawBuffer[i] = c;
}
}
template <typename T>
inline void Lexer<T>::append16(const LChar* p, size_t length)
{
size_t currentSize = m_buffer16.size();
m_buffer16.grow(currentSize + length);
UChar* rawBuffer = m_buffer16.data() + currentSize;
for (size_t i = 0; i < length; i++)
rawBuffer[i] = p[i];
}
template <typename T>
inline void Lexer<T>::record16(T c)
{
m_buffer16.append(c);
}
template <typename T>
inline void Lexer<T>::record16(int c)
{
ASSERT(c >= 0);
ASSERT(c <= static_cast<int>(USHRT_MAX));
m_buffer16.append(static_cast<UChar>(c));
}
#if !ASSERT_DISABLED
bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
{
if (!ident)
return true;
if (*ident == vm.propertyNames->builtinNames().callPublicName())
return false;
if (*ident == vm.propertyNames->builtinNames().applyPublicName())
return false;
if (*ident == vm.propertyNames->eval)
return false;
if (*ident == vm.propertyNames->Function)
return false;
return true;
}
#endif
template <>
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
const ptrdiff_t remaining = m_codeEnd - m_code;
if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
if (keyword != IDENT) {
ASSERT((!shouldCreateIdentifier) || tokenData->ident);
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
}
}
bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
if (isPrivateName)
shift();
const LChar* identifierStart = currentSourcePtr();
unsigned identifierLineStart = currentLineStartOffset();
while (isIdentPart(m_current))
shift();
if (UNLIKELY(m_current == '\\')) {
setOffsetFromSourcePtr(identifierStart, identifierLineStart);
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
}
const Identifier* ident = 0;
if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
int identifierLength = currentSourcePtr() - identifierStart;
ident = makeIdentifier(identifierStart, identifierLength);
if (m_parsingBuiltinFunction) {
if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
return ERRORTOK;
}
if (isPrivateName)
ident = m_vm->propertyNames->getPrivateName(*ident);
else if (*ident == m_vm->propertyNames->undefinedKeyword)
tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
if (!ident)
return INVALID_PRIVATE_NAME_ERRORTOK;
}
tokenData->ident = ident;
} else
tokenData->ident = 0;
if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
}
return IDENT;
}
return IDENT;
}
template <>
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
const ptrdiff_t remaining = m_codeEnd - m_code;
if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
if (keyword != IDENT) {
ASSERT((!shouldCreateIdentifier) || tokenData->ident);
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
}
}
bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
if (isPrivateName)
shift();
const UChar* identifierStart = currentSourcePtr();
int identifierLineStart = currentLineStartOffset();
UChar orAllChars = 0;
while (isIdentPart(m_current)) {
orAllChars |= m_current;
shift();
}
if (UNLIKELY(m_current == '\\')) {
ASSERT(!isPrivateName);
setOffsetFromSourcePtr(identifierStart, identifierLineStart);
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
}
bool isAll8Bit = false;
if (!(orAllChars & ~0xff))
isAll8Bit = true;
const Identifier* ident = 0;
if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
int identifierLength = currentSourcePtr() - identifierStart;
if (isAll8Bit)
ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
else
ident = makeIdentifier(identifierStart, identifierLength);
if (m_parsingBuiltinFunction) {
if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
return ERRORTOK;
}
if (isPrivateName)
ident = m_vm->propertyNames->getPrivateName(*ident);
else if (*ident == m_vm->propertyNames->undefinedKeyword)
tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
if (!ident)
return INVALID_PRIVATE_NAME_ERRORTOK;
}
tokenData->ident = ident;
} else
tokenData->ident = 0;
if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
}
return IDENT;
}
return IDENT;
}
template <typename T>
template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
const ptrdiff_t remaining = m_codeEnd - m_code;
const T* identifierStart = currentSourcePtr();
bool bufferRequired = false;
while (true) {
if (LIKELY(isIdentPart(m_current))) {
shift();
continue;
}
if (LIKELY(m_current != '\\'))
break;
bufferRequired = true;
if (identifierStart != currentSourcePtr())
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
shift();
if (UNLIKELY(m_current != 'u'))
return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
shift();
UnicodeHexValue character = parseFourDigitUnicodeHex();
if (UNLIKELY(!character.isValid()))
return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
UChar ucharacter = static_cast<UChar>(character.value());
if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
if (shouldCreateIdentifier)
record16(ucharacter);
identifierStart = currentSourcePtr();
}
int identifierLength;
const Identifier* ident = 0;
if (shouldCreateIdentifier) {
if (!bufferRequired) {
identifierLength = currentSourcePtr() - identifierStart;
ident = makeIdentifier(identifierStart, identifierLength);
} else {
if (identifierStart != currentSourcePtr())
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
}
tokenData->ident = ident;
} else
tokenData->ident = 0;
if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
const HashTableValue* entry = m_vm->keywords->getKeyword(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
}
return IDENT;
}
m_buffer16.resize(0);
return IDENT;
}
static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
{
return character < 0xE;
}
static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
{
return character < 0xE || character > 0xFF;
}
template <typename T>
template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
{
int startingOffset = currentOffset();
int startingLineStartOffset = currentLineStartOffset();
int startingLineNumber = lineNumber();
T stringQuoteCharacter = m_current;
shift();
const T* stringStart = currentSourcePtr();
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\')) {
if (stringStart != currentSourcePtr() && shouldBuildStrings)
append8(stringStart, currentSourcePtr() - stringStart);
shift();
LChar escape = singleEscape(m_current);
if (escape) {
if (shouldBuildStrings)
record8(escape);
shift();
} else if (UNLIKELY(isLineTerminator(m_current)))
shiftLineTerminator();
else if (m_current == 'x') {
shift();
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
}
T prev = m_current;
shift();
if (shouldBuildStrings)
record8(convertHex(prev, m_current));
shift();
} else {
setOffset(startingOffset, startingLineStartOffset);
setLineNumber(startingLineNumber);
m_buffer8.resize(0);
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
}
stringStart = currentSourcePtr();
continue;
}
if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
setOffset(startingOffset, startingLineStartOffset);
setLineNumber(startingLineNumber);
m_buffer8.resize(0);
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
}
shift();
}
if (currentSourcePtr() != stringStart && shouldBuildStrings)
append8(stringStart, currentSourcePtr() - stringStart);
if (shouldBuildStrings) {
tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
m_buffer8.resize(0);
} else
tokenData->ident = 0;
return StringParsedSuccessfully;
}
template <typename T>
template <bool shouldBuildStrings> typename Lexer<T>::StringParseResult Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
{
T stringQuoteCharacter = m_current;
shift();
const T* stringStart = currentSourcePtr();
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\')) {
if (stringStart != currentSourcePtr() && shouldBuildStrings)
append16(stringStart, currentSourcePtr() - stringStart);
shift();
LChar escape = singleEscape(m_current);
if (escape) {
if (shouldBuildStrings)
record16(escape);
shift();
} else if (UNLIKELY(isLineTerminator(m_current)))
shiftLineTerminator();
else if (m_current == 'x') {
shift();
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
return StringCannotBeParsed;
}
T prev = m_current;
shift();
if (shouldBuildStrings)
record16(convertHex(prev, m_current));
shift();
} else if (m_current == 'u') {
shift();
UnicodeHexValue character = parseFourDigitUnicodeHex();
if (character.isValid()) {
if (shouldBuildStrings)
record16(character.value());
} else if (m_current == stringQuoteCharacter) {
if (shouldBuildStrings)
record16('u');
} else {
m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
return character.valueType() == UnicodeHexValue::IncompleteHex ? StringUnterminated : StringCannotBeParsed;
}
} else if (strictMode && isASCIIDigit(m_current)) {
int character1 = m_current;
shift();
if (character1 != '0' || isASCIIDigit(m_current)) {
m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
return StringCannotBeParsed;
}
if (shouldBuildStrings)
record16(0);
} else if (!strictMode && isASCIIOctalDigit(m_current)) {
T character1 = m_current;
shift();
if (isASCIIOctalDigit(m_current)) {
T character2 = m_current;
shift();
if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
if (shouldBuildStrings)
record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
shift();
} else {
if (shouldBuildStrings)
record16((character1 - '0') * 8 + character2 - '0');
}
} else {
if (shouldBuildStrings)
record16(character1 - '0');
}
} else if (!atEnd()) {
if (shouldBuildStrings)
record16(m_current);
shift();
} else {
m_lexErrorMessage = "Unterminated string constant";
return StringUnterminated;
}
stringStart = currentSourcePtr();
continue;
}
if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
if (atEnd() || isLineTerminator(m_current)) {
m_lexErrorMessage = "Unexpected EOF";
return atEnd() ? StringUnterminated : StringCannotBeParsed;
}
}
shift();
}
if (currentSourcePtr() != stringStart && shouldBuildStrings)
append16(stringStart, currentSourcePtr() - stringStart);
if (shouldBuildStrings)
tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
else
tokenData->ident = 0;
m_buffer16.resize(0);
return StringParsedSuccessfully;
}
template <typename T>
ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
{
uint32_t hexValue = 0;
int maximumDigits = 7;
shift();
do {
hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
shift();
--maximumDigits;
} while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
if (maximumDigits >= 0) {
returnValue = hexValue;
return;
}
for (int i = 0; i < 8; ++i) {
int digit = hexValue >> 28;
if (digit < 10)
record8(digit + '0');
else
record8(digit - 10 + 'a');
hexValue <<= 4;
}
while (isASCIIHexDigit(m_current)) {
record8(m_current);
shift();
}
returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
{
uint32_t octalValue = 0;
int maximumDigits = 9;
LChar digits[10];
do {
octalValue = octalValue * 8 + (m_current - '0');
digits[maximumDigits] = m_current;
shift();
--maximumDigits;
} while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
returnValue = octalValue;
return true;
}
for (int i = 9; i > maximumDigits; --i)
record8(digits[i]);
while (isASCIIOctalDigit(m_current)) {
record8(m_current);
shift();
}
if (isASCIIDigit(m_current))
return false;
returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
return true;
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
{
uint32_t decimalValue = 0;
if (!m_buffer8.size()) {
int maximumDigits = 9;
LChar digits[10];
do {
decimalValue = decimalValue * 10 + (m_current - '0');
digits[maximumDigits] = m_current;
shift();
--maximumDigits;
} while (isASCIIDigit(m_current) && maximumDigits >= 0);
if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
returnValue = decimalValue;
return true;
}
for (int i = 9; i > maximumDigits; --i)
record8(digits[i]);
}
while (isASCIIDigit(m_current)) {
record8(m_current);
shift();
}
return false;
}
template <typename T>
ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
{
record8('.');
while (isASCIIDigit(m_current)) {
record8(m_current);
shift();
}
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
{
record8('e');
shift();
if (m_current == '+' || m_current == '-') {
record8(m_current);
shift();
}
if (!isASCIIDigit(m_current))
return false;
do {
record8(m_current);
shift();
} while (isASCIIDigit(m_current));
return true;
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
{
while (true) {
while (UNLIKELY(m_current == '*')) {
shift();
if (m_current == '/') {
shift();
return true;
}
}
if (atEnd())
return false;
if (isLineTerminator(m_current)) {
shiftLineTerminator();
m_terminator = true;
} else
shift();
}
}
template <typename T>
bool Lexer<T>::nextTokenIsColon()
{
const T* code = m_code;
while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
code++;
return code < m_codeEnd && *code == ':';
}
template <typename T>
JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
{
JSTokenData* tokenData = &tokenRecord->m_data;
JSTokenLocation* tokenLocation = &tokenRecord->m_location;
ASSERT(!m_error);
ASSERT(m_buffer8.isEmpty());
ASSERT(m_buffer16.isEmpty());
JSTokenType token = ERRORTOK;
m_terminator = false;
start:
while (isWhiteSpace(m_current))
shift();
if (atEnd())
return EOFTOK;
tokenLocation->startOffset = currentOffset();
ASSERT(currentOffset() >= currentLineStartOffset());
tokenRecord->m_startPosition = currentPosition();
CharacterType type;
if (LIKELY(isLatin1(m_current)))
type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
else if (isNonLatin1IdentStart(m_current))
type = CharacterIdentifierStart;
else if (isLineTerminator(m_current))
type = CharacterLineTerminator;
else
type = CharacterInvalid;
switch (type) {
case CharacterGreater:
shift();
if (m_current == '>') {
shift();
if (m_current == '>') {
shift();
if (m_current == '=') {
shift();
token = URSHIFTEQUAL;
break;
}
token = URSHIFT;
break;
}
if (m_current == '=') {
shift();
token = RSHIFTEQUAL;
break;
}
token = RSHIFT;
break;
}
if (m_current == '=') {
shift();
token = GE;
break;
}
token = GT;
break;
case CharacterEqual:
shift();
if (m_current == '=') {
shift();
if (m_current == '=') {
shift();
token = STREQ;
break;
}
token = EQEQ;
break;
}
token = EQUAL;
break;
case CharacterLess:
shift();
if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
goto inSingleLineComment;
}
if (m_current == '<') {
shift();
if (m_current == '=') {
shift();
token = LSHIFTEQUAL;
break;
}
token = LSHIFT;
break;
}
if (m_current == '=') {
shift();
token = LE;
break;
}
token = LT;
break;
case CharacterExclamationMark:
shift();
if (m_current == '=') {
shift();
if (m_current == '=') {
shift();
token = STRNEQ;
break;
}
token = NE;
break;
}
token = EXCLAMATION;
break;
case CharacterAdd:
shift();
if (m_current == '+') {
shift();
token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
break;
}
if (m_current == '=') {
shift();
token = PLUSEQUAL;
break;
}
token = PLUS;
break;
case CharacterSub:
shift();
if (m_current == '-') {
shift();
if (m_atLineStart && m_current == '>') {
shift();
goto inSingleLineComment;
}
token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
break;
}
if (m_current == '=') {
shift();
token = MINUSEQUAL;
break;
}
token = MINUS;
break;
case CharacterMultiply:
shift();
if (m_current == '=') {
shift();
token = MULTEQUAL;
break;
}
token = TIMES;
break;
case CharacterSlash:
shift();
if (m_current == '/') {
shift();
goto inSingleLineComment;
}
if (m_current == '*') {
shift();
if (parseMultilineComment())
goto start;
m_lexErrorMessage = "Multiline comment was not closed properly";
token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
goto returnError;
}
if (m_current == '=') {
shift();
token = DIVEQUAL;
break;
}
token = DIVIDE;
break;
case CharacterAnd:
shift();
if (m_current == '&') {
shift();
token = AND;
break;
}
if (m_current == '=') {
shift();
token = ANDEQUAL;
break;
}
token = BITAND;
break;
case CharacterXor:
shift();
if (m_current == '=') {
shift();
token = XOREQUAL;
break;
}
token = BITXOR;
break;
case CharacterModulo:
shift();
if (m_current == '=') {
shift();
token = MODEQUAL;
break;
}
token = MOD;
break;
case CharacterOr:
shift();
if (m_current == '=') {
shift();
token = OREQUAL;
break;
}
if (m_current == '|') {
shift();
token = OR;
break;
}
token = BITOR;
break;
case CharacterOpenParen:
token = OPENPAREN;
shift();
break;
case CharacterCloseParen:
token = CLOSEPAREN;
shift();
break;
case CharacterOpenBracket:
token = OPENBRACKET;
shift();
break;
case CharacterCloseBracket:
token = CLOSEBRACKET;
shift();
break;
case CharacterComma:
token = COMMA;
shift();
break;
case CharacterColon:
token = COLON;
shift();
break;
case CharacterQuestion:
token = QUESTION;
shift();
break;
case CharacterTilde:
token = TILDE;
shift();
break;
case CharacterSemicolon:
shift();
token = SEMICOLON;
break;
case CharacterOpenBrace:
tokenData->line = lineNumber();
tokenData->offset = currentOffset();
tokenData->lineStartOffset = currentLineStartOffset();
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
shift();
token = OPENBRACE;
break;
case CharacterCloseBrace:
tokenData->line = lineNumber();
tokenData->offset = currentOffset();
tokenData->lineStartOffset = currentLineStartOffset();
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
shift();
token = CLOSEBRACE;
break;
case CharacterDot:
shift();
if (!isASCIIDigit(m_current)) {
if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
shift();
shift();
token = DOTDOTDOT;
break;
}
token = DOT;
break;
}
goto inNumberAfterDecimalPoint;
case CharacterZero:
shift();
if ((m_current | 0x20) == 'x') {
if (!isASCIIHexDigit(peek(1))) {
m_lexErrorMessage = "No hexadecimal digits after '0x'";
token = INVALID_HEX_NUMBER_ERRORTOK;
goto returnError;
}
parseHex(tokenData->doubleValue);
if (isIdentStart(m_current)) {
m_lexErrorMessage = "No space between hexadecimal literal and identifier";
token = INVALID_HEX_NUMBER_ERRORTOK;
goto returnError;
}
token = NUMBER;
m_buffer8.resize(0);
break;
}
record8('0');
if (isASCIIOctalDigit(m_current)) {
if (parseOctal(tokenData->doubleValue)) {
if (strictMode) {
m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
token = INVALID_OCTAL_NUMBER_ERRORTOK;
goto returnError;
}
token = NUMBER;
}
}
FALLTHROUGH;
case CharacterNumber:
if (LIKELY(token != NUMBER)) {
if (!parseDecimal(tokenData->doubleValue)) {
if (m_current == '.') {
shift();
inNumberAfterDecimalPoint:
parseNumberAfterDecimalPoint();
}
if ((m_current | 0x20) == 'e') {
if (!parseNumberAfterExponentIndicator()) {
m_lexErrorMessage = "Non-number found after exponent indicator";
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
}
size_t parsedLength;
tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
}
token = NUMBER;
}
if (UNLIKELY(isIdentStart(m_current))) {
m_lexErrorMessage = "At least one digit must occur after a decimal point";
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
m_buffer8.resize(0);
break;
case CharacterQuote:
if (lexerFlags & LexerFlagsDontBuildStrings) {
StringParseResult result = parseString<false>(tokenData, strictMode);
if (UNLIKELY(result != StringParsedSuccessfully)) {
token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
goto returnError;
}
} else {
StringParseResult result = parseString<true>(tokenData, strictMode);
if (UNLIKELY(result != StringParsedSuccessfully)) {
token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
goto returnError;
}
}
shift();
token = STRING;
break;
case CharacterIdentifierStart:
ASSERT(isIdentStart(m_current));
FALLTHROUGH;
case CharacterBackSlash:
parseIdent:
if (lexerFlags & LexexFlagsDontBuildKeywords)
token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
else
token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
break;
case CharacterLineTerminator:
ASSERT(isLineTerminator(m_current));
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
m_lineStart = m_code;
goto start;
case CharacterPrivateIdentifierStart:
if (m_parsingBuiltinFunction)
goto parseIdent;
FALLTHROUGH;
case CharacterInvalid:
m_lexErrorMessage = invalidCharacterMessage();
token = ERRORTOK;
goto returnError;
default:
RELEASE_ASSERT_NOT_REACHED();
m_lexErrorMessage = "Internal Error";
token = ERRORTOK;
goto returnError;
}
m_atLineStart = false;
goto returnToken;
inSingleLineComment:
while (!isLineTerminator(m_current)) {
if (atEnd())
return EOFTOK;
shift();
}
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
m_lineStart = m_code;
if (!lastTokenWasRestrKeyword())
goto start;
token = SEMICOLON;
returnToken:
tokenLocation->line = m_lineNumber;
tokenLocation->endOffset = currentOffset();
tokenLocation->lineStartOffset = currentLineStartOffset();
ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
tokenRecord->m_endPosition = currentPosition();
m_lastToken = token;
return token;
returnError:
m_error = true;
tokenLocation->line = m_lineNumber;
tokenLocation->endOffset = currentOffset();
tokenLocation->lineStartOffset = currentLineStartOffset();
ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
tokenRecord->m_endPosition = currentPosition();
RELEASE_ASSERT(token & ErrorTokenFlag);
return token;
}
template <typename T>
static inline void orCharacter(UChar&, UChar);
template <>
inline void orCharacter<LChar>(UChar&, UChar) { }
template <>
inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
{
orAccumulator |= character;
}
template <typename T>
bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
{
ASSERT(m_buffer16.isEmpty());
bool lastWasEscape = false;
bool inBrackets = false;
UChar charactersOredTogether = 0;
if (patternPrefix) {
ASSERT(!isLineTerminator(patternPrefix));
ASSERT(patternPrefix != '/');
ASSERT(patternPrefix != '[');
record16(patternPrefix);
}
while (true) {
if (isLineTerminator(m_current) || atEnd()) {
m_buffer16.resize(0);
return false;
}
T prev = m_current;
shift();
if (prev == '/' && !lastWasEscape && !inBrackets)
break;
record16(prev);
orCharacter<T>(charactersOredTogether, prev);
if (lastWasEscape) {
lastWasEscape = false;
continue;
}
switch (prev) {
case '[':
inBrackets = true;
break;
case ']':
inBrackets = false;
break;
case '\\':
lastWasEscape = true;
break;
}
}
pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
m_buffer16.resize(0);
charactersOredTogether = 0;
while (isIdentPart(m_current)) {
record16(m_current);
orCharacter<T>(charactersOredTogether, m_current);
shift();
}
flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
m_buffer16.resize(0);
return true;
}
template <typename T>
bool Lexer<T>::skipRegExp()
{
bool lastWasEscape = false;
bool inBrackets = false;
while (true) {
if (isLineTerminator(m_current) || atEnd())
return false;
T prev = m_current;
shift();
if (prev == '/' && !lastWasEscape && !inBrackets)
break;
if (lastWasEscape) {
lastWasEscape = false;
continue;
}
switch (prev) {
case '[':
inBrackets = true;
break;
case ']':
inBrackets = false;
break;
case '\\':
lastWasEscape = true;
break;
}
}
while (isIdentPart(m_current))
shift();
return true;
}
template <typename T>
void Lexer<T>::clear()
{
m_arena = 0;
Vector<LChar> newBuffer8;
m_buffer8.swap(newBuffer8);
Vector<UChar> newBuffer16;
m_buffer16.swap(newBuffer16);
m_isReparsing = false;
}
template class Lexer<LChar>;
template class Lexer<UChar>;
}