#include "config.h"
#include "Lexer.h"
#include "BuiltinNames.h"
#include "Identifier.h"
#include "JSCInlines.h"
#include "JSFunctionInlines.h"
#include "KeywordLookup.h"
#include "Lexer.lut.h"
#include "Nodes.h"
#include "ParseInt.h"
#include "Parser.h"
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>
#include <wtf/Variant.h>
#include <wtf/dtoa.h>
namespace JSC {
bool isLexerKeyword(const Identifier& identifier)
{
return JSC::mainTable.entry(identifier);
}
enum CharacterType {
CharacterIdentifierStart,
CharacterZero,
CharacterNumber,
CharacterInvalid,
CharacterLineTerminator,
CharacterExclamationMark,
CharacterOpenParen,
CharacterCloseParen,
CharacterOpenBracket,
CharacterCloseBracket,
CharacterComma,
CharacterColon,
CharacterQuestion,
CharacterTilde,
CharacterQuote,
CharacterBackQuote,
CharacterDot,
CharacterSlash,
CharacterBackSlash,
CharacterSemicolon,
CharacterOpenBrace,
CharacterCloseBrace,
CharacterAdd,
CharacterSub,
CharacterMultiply,
CharacterModulo,
CharacterAnd,
CharacterXor,
CharacterOr,
CharacterLess,
CharacterGreater,
CharacterEqual,
CharacterWhiteSpace,
CharacterPrivateIdentifierStart
};
static constexpr const unsigned short typesOfLatin1Characters[256] = {
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterWhiteSpace,
CharacterLineTerminator,
CharacterWhiteSpace,
CharacterWhiteSpace,
CharacterLineTerminator,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterWhiteSpace,
CharacterExclamationMark,
CharacterQuote,
CharacterInvalid,
CharacterIdentifierStart,
CharacterModulo,
CharacterAnd,
CharacterQuote,
CharacterOpenParen,
CharacterCloseParen,
CharacterMultiply,
CharacterAdd,
CharacterComma,
CharacterSub,
CharacterDot,
CharacterSlash,
CharacterZero,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterNumber,
CharacterColon,
CharacterSemicolon,
CharacterLess,
CharacterEqual,
CharacterGreater,
CharacterQuestion,
CharacterPrivateIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterOpenBracket,
CharacterBackSlash,
CharacterCloseBracket,
CharacterXor,
CharacterIdentifierStart,
CharacterBackQuote,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterOpenBrace,
CharacterOr,
CharacterCloseBrace,
CharacterTilde,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterWhiteSpace,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterInvalid,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterInvalid,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterInvalid,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart,
CharacterIdentifierStart
};
static constexpr const LChar singleCharacterEscapeValuesForASCII[128] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
' ',
'!',
'"',
'#',
'$',
'%',
'&',
'\'',
'(',
')',
'*',
'+',
',',
'-',
'.',
'/',
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
':',
';',
'<',
'=',
'>',
'?',
'@',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z',
'[',
'\\',
']',
'^',
'_',
'`',
'a',
0x08,
'c',
'd',
'e',
0x0C,
'g',
'h',
'i',
'j',
'k',
'l',
'm',
0x0A,
'o',
'p',
'q',
0x0D,
's',
0x09,
0,
0x0B,
'w',
0,
'y',
'z',
'{',
'|',
'}',
'~',
0
};
template <typename T>
Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
: m_isReparsingFunction(false)
, m_vm(vm)
, m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
, m_scriptMode(scriptMode)
{
}
static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
{
if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
return INTEGER;
return DOUBLE;
}
template <typename T>
Lexer<T>::~Lexer()
{
}
template <typename T>
String Lexer<T>::invalidCharacterMessage() const
{
switch (m_current) {
case 0:
return "Invalid character: '\\0'"_s;
case 10:
return "Invalid character: '\\n'"_s;
case 11:
return "Invalid character: '\\v'"_s;
case 13:
return "Invalid character: '\\r'"_s;
case 35:
return "Invalid character: '#'"_s;
case 64:
return "Invalid character: '@'"_s;
case 96:
return "Invalid character: '`'"_s;
default:
return String::format("Invalid character '\\u%04x'", static_cast<unsigned>(m_current));
}
}
template <typename T>
ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
{
ASSERT(m_code <= m_codeEnd);
return m_code;
}
template <typename T>
void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
{
m_arena = &arena->identifierArena();
m_lineNumber = source.firstLine().oneBasedInt();
m_lastToken = -1;
StringView sourceString = source.provider()->source();
if (!sourceString.isNull())
setCodeStart(sourceString);
else
m_codeStart = 0;
m_source = &source;
m_sourceOffset = source.startOffset();
m_codeStartPlusOffset = m_codeStart + source.startOffset();
m_code = m_codeStartPlusOffset;
m_codeEnd = m_codeStart + source.endOffset();
m_error = false;
m_atLineStart = true;
m_lineStart = m_code;
m_lexErrorMessage = String();
m_sourceURLDirective = String();
m_sourceMappingURLDirective = String();
m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
if (LIKELY(m_code < m_codeEnd))
m_current = *m_code;
else
m_current = 0;
ASSERT(currentOffset() == source.startOffset());
}
template <typename T>
template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
{
m_code += shiftAmount;
ASSERT(currentOffset() >= currentLineStartOffset());
m_current = *m_code;
}
template <typename T>
ALWAYS_INLINE void Lexer<T>::shift()
{
m_current = 0;
++m_code;
if (LIKELY(m_code < m_codeEnd))
m_current = *m_code;
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::atEnd() const
{
ASSERT(!m_current || m_code < m_codeEnd);
return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
}
template <typename T>
ALWAYS_INLINE T Lexer<T>::peek(int offset) const
{
ASSERT(offset > 0 && offset < 5);
const T* code = m_code + offset;
return (code < m_codeEnd) ? *code : 0;
}
struct ParsedUnicodeEscapeValue {
ParsedUnicodeEscapeValue(UChar32 value)
: m_value(value)
{
ASSERT(isValid());
}
enum SpecialValueType { Incomplete = -2, Invalid = -1 };
ParsedUnicodeEscapeValue(SpecialValueType type)
: m_value(type)
{
}
bool isValid() const { return m_value >= 0; }
bool isIncomplete() const { return m_value == Incomplete; }
UChar32 value() const
{
ASSERT(isValid());
return m_value;
}
private:
UChar32 m_value;
};
template<typename CharacterType>
ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
{
if (m_current == '{') {
shift();
UChar32 codePoint = 0;
do {
if (!isASCIIHexDigit(m_current))
return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
if (codePoint > UCHAR_MAX_VALUE) {
shift();
while (isASCIIHexDigit(m_current))
shift();
return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
}
shift();
} while (m_current != '}');
shift();
return codePoint;
}
auto character2 = peek(1);
auto character3 = peek(2);
auto character4 = peek(3);
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
while (isASCIIHexDigit(m_current))
shift();
return result;
}
auto result = convertUnicode(m_current, character2, character3, character4);
shift();
shift();
shift();
shift();
return result;
}
template <typename T>
void Lexer<T>::shiftLineTerminator()
{
ASSERT(isLineTerminator(m_current));
m_positionBeforeLastNewline = currentPosition();
T prev = m_current;
shift();
if (prev == '\r' && m_current == '\n')
shift();
++m_lineNumber;
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
{
return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
}
template <typename T>
ALWAYS_INLINE void Lexer<T>::skipWhitespace()
{
while (isWhiteSpace(m_current))
shift();
}
static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
{
return U_GET_GC_MASK(c) & U_GC_L_MASK;
}
static ALWAYS_INLINE bool isLatin1(LChar)
{
return true;
}
static ALWAYS_INLINE bool isLatin1(UChar c)
{
return c < 256;
}
static ALWAYS_INLINE bool isLatin1(UChar32 c)
{
return !(c & ~0xFF);
}
static inline bool isIdentStart(LChar c)
{
return typesOfLatin1Characters[c] == CharacterIdentifierStart;
}
static inline bool isIdentStart(UChar32 c)
{
return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
}
static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
{
return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D;
}
static ALWAYS_INLINE bool isIdentPart(LChar c)
{
return typesOfLatin1Characters[c] <= CharacterNumber;
}
static ALWAYS_INLINE bool isIdentPart(UChar32 c)
{
return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
}
static ALWAYS_INLINE bool isIdentPart(UChar c)
{
return isIdentPart(static_cast<UChar32>(c));
}
template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
{
if (isIdentPart(code[0]))
return true;
if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
return false;
if (code[2] == '{') {
UChar32 codePoint = 0;
const CharacterType* pointer;
for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
auto digit = *pointer;
if (!isASCIIHexDigit(digit))
break;
codePoint = (codePoint << 4) | toASCIIHexValue(digit);
if (codePoint > UCHAR_MAX_VALUE)
return false;
}
return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
}
if (codeEnd - code < 6)
return false;
auto character1 = code[2];
auto character2 = code[3];
auto character3 = code[4];
auto character4 = code[5];
return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
&& isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
}
static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
{
return isIdentPartIncludingEscapeTemplate(code, codeEnd);
}
static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
{
return isIdentPartIncludingEscapeTemplate(code, codeEnd);
}
static inline LChar singleEscape(int c)
{
if (c < 128) {
ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
return singleCharacterEscapeValuesForASCII[c];
}
return 0;
}
template <typename T>
inline void Lexer<T>::record8(int c)
{
ASSERT(c >= 0);
ASSERT(c <= 0xFF);
m_buffer8.append(static_cast<LChar>(c));
}
template <typename T>
inline void assertCharIsIn8BitRange(T c)
{
UNUSED_PARAM(c);
ASSERT(c >= 0);
ASSERT(c <= 0xFF);
}
template <>
inline void assertCharIsIn8BitRange(UChar c)
{
UNUSED_PARAM(c);
ASSERT(c <= 0xFF);
}
template <>
inline void assertCharIsIn8BitRange(LChar)
{
}
template <typename T>
inline void Lexer<T>::append8(const T* p, size_t length)
{
size_t currentSize = m_buffer8.size();
m_buffer8.grow(currentSize + length);
LChar* rawBuffer = m_buffer8.data() + currentSize;
for (size_t i = 0; i < length; i++) {
T c = p[i];
assertCharIsIn8BitRange(c);
rawBuffer[i] = c;
}
}
template <typename T>
inline void Lexer<T>::append16(const LChar* p, size_t length)
{
size_t currentSize = m_buffer16.size();
m_buffer16.grow(currentSize + length);
UChar* rawBuffer = m_buffer16.data() + currentSize;
for (size_t i = 0; i < length; i++)
rawBuffer[i] = p[i];
}
template <typename T>
inline void Lexer<T>::record16(T c)
{
m_buffer16.append(c);
}
template <typename T>
inline void Lexer<T>::record16(int c)
{
ASSERT(c >= 0);
ASSERT(c <= static_cast<int>(USHRT_MAX));
m_buffer16.append(static_cast<UChar>(c));
}
template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
{
ASSERT(codePoint >= 0);
ASSERT(codePoint <= UCHAR_MAX_VALUE);
if (U_IS_BMP(codePoint))
record16(codePoint);
else {
UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
append16(codeUnits, 2);
}
}
#if !ASSERT_DISABLED
bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
{
if (!ident)
return true;
if (*ident == vm.propertyNames->builtinNames().callPublicName())
return false;
if (*ident == vm.propertyNames->builtinNames().applyPublicName())
return false;
if (*ident == vm.propertyNames->eval)
return false;
if (*ident == vm.propertyNames->Function)
return false;
return true;
}
#endif
template <>
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
tokenData->escaped = false;
const ptrdiff_t remaining = m_codeEnd - m_code;
if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
if (keyword != IDENT) {
ASSERT((!shouldCreateIdentifier) || tokenData->ident);
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
}
}
bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
if (isPrivateName)
shift();
const LChar* identifierStart = currentSourcePtr();
unsigned identifierLineStart = currentLineStartOffset();
while (isIdentPart(m_current))
shift();
if (UNLIKELY(m_current == '\\')) {
setOffsetFromSourcePtr(identifierStart, identifierLineStart);
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
}
const Identifier* ident = nullptr;
if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
int identifierLength = currentSourcePtr() - identifierStart;
ident = makeIdentifier(identifierStart, identifierLength);
if (m_parsingBuiltinFunction) {
if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
return ERRORTOK;
}
if (isPrivateName)
ident = m_vm->propertyNames->lookUpPrivateName(*ident);
else if (*ident == m_vm->propertyNames->undefinedKeyword)
tokenData->ident = &m_vm->propertyNames->builtinNames().undefinedPrivateName();
if (!ident)
return INVALID_PRIVATE_NAME_ERRORTOK;
}
tokenData->ident = ident;
} else
tokenData->ident = nullptr;
if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
const HashTableValue* entry = JSC::mainTable.entry(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
}
return IDENT;
}
return IDENT;
}
template <>
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
tokenData->escaped = false;
const ptrdiff_t remaining = m_codeEnd - m_code;
if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
if (keyword != IDENT) {
ASSERT((!shouldCreateIdentifier) || tokenData->ident);
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
}
}
bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
if (isPrivateName)
shift();
const UChar* identifierStart = currentSourcePtr();
int identifierLineStart = currentLineStartOffset();
UChar orAllChars = 0;
while (isIdentPart(m_current)) {
orAllChars |= m_current;
shift();
}
if (UNLIKELY(m_current == '\\')) {
ASSERT(!isPrivateName);
setOffsetFromSourcePtr(identifierStart, identifierLineStart);
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
}
bool isAll8Bit = false;
if (!(orAllChars & ~0xff))
isAll8Bit = true;
const Identifier* ident = nullptr;
if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
int identifierLength = currentSourcePtr() - identifierStart;
if (isAll8Bit)
ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
else
ident = makeIdentifier(identifierStart, identifierLength);
if (m_parsingBuiltinFunction) {
if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
return ERRORTOK;
}
if (isPrivateName)
ident = m_vm->propertyNames->lookUpPrivateName(*ident);
else if (*ident == m_vm->propertyNames->undefinedKeyword)
tokenData->ident = &m_vm->propertyNames->builtinNames().undefinedPrivateName();
if (!ident)
return INVALID_PRIVATE_NAME_ERRORTOK;
}
tokenData->ident = ident;
} else
tokenData->ident = nullptr;
if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
ASSERT(shouldCreateIdentifier);
if (remaining < maxTokenLength) {
const HashTableValue* entry = JSC::mainTable.entry(*ident);
ASSERT((remaining < maxTokenLength) || !entry);
if (!entry)
return IDENT;
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
}
return IDENT;
}
return IDENT;
}
template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
{
tokenData->escaped = true;
auto identifierStart = currentSourcePtr();
bool bufferRequired = false;
while (true) {
if (LIKELY(isIdentPart(m_current))) {
shift();
continue;
}
if (LIKELY(m_current != '\\'))
break;
bufferRequired = true;
if (identifierStart != currentSourcePtr())
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
shift();
if (UNLIKELY(m_current != 'u'))
return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
shift();
auto character = parseUnicodeEscape();
if (UNLIKELY(!character.isValid()))
return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
if (shouldCreateIdentifier)
recordUnicodeCodePoint(character.value());
identifierStart = currentSourcePtr();
}
int identifierLength;
const Identifier* ident = nullptr;
if (shouldCreateIdentifier) {
if (!bufferRequired) {
identifierLength = currentSourcePtr() - identifierStart;
ident = makeIdentifier(identifierStart, identifierLength);
} else {
if (identifierStart != currentSourcePtr())
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
}
tokenData->ident = ident;
} else
tokenData->ident = nullptr;
m_buffer16.shrink(0);
if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
ASSERT(shouldCreateIdentifier);
const HashTableValue* entry = JSC::mainTable.entry(*ident);
if (!entry)
return IDENT;
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
if ((token != RESERVED_IF_STRICT) || strictMode)
return bufferRequired ? UNEXPECTED_ESCAPE_ERRORTOK : token;
}
return IDENT;
}
static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
{
return character < 0xE;
}
static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
{
return character < 0xE || character > 0xFF;
}
template <typename T>
template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
{
int startingOffset = currentOffset();
int startingLineStartOffset = currentLineStartOffset();
int startingLineNumber = lineNumber();
T stringQuoteCharacter = m_current;
shift();
const T* stringStart = currentSourcePtr();
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\')) {
if (stringStart != currentSourcePtr() && shouldBuildStrings)
append8(stringStart, currentSourcePtr() - stringStart);
shift();
LChar escape = singleEscape(m_current);
if (escape) {
if (shouldBuildStrings)
record8(escape);
shift();
} else if (UNLIKELY(isLineTerminator(m_current)))
shiftLineTerminator();
else if (m_current == 'x') {
shift();
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
}
T prev = m_current;
shift();
if (shouldBuildStrings)
record8(convertHex(prev, m_current));
shift();
} else {
setOffset(startingOffset, startingLineStartOffset);
setLineNumber(startingLineNumber);
m_buffer8.shrink(0);
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
}
stringStart = currentSourcePtr();
continue;
}
if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
setOffset(startingOffset, startingLineStartOffset);
setLineNumber(startingLineNumber);
m_buffer8.shrink(0);
return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
}
shift();
}
if (currentSourcePtr() != stringStart && shouldBuildStrings)
append8(stringStart, currentSourcePtr() - stringStart);
if (shouldBuildStrings) {
tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
m_buffer8.shrink(0);
} else
tokenData->ident = 0;
return StringParsedSuccessfully;
}
template <typename T>
template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode, T stringQuoteCharacter) -> StringParseResult
{
if (m_current == 'x') {
shift();
if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
if (isASCIIHexDigit(m_current))
shift();
ASSERT(!isASCIIHexDigit(m_current));
m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
return atEnd() ? StringUnterminated : StringCannotBeParsed;
}
T prev = m_current;
shift();
if (shouldBuildStrings)
record16(convertHex(prev, m_current));
shift();
return StringParsedSuccessfully;
}
if (m_current == 'u') {
shift();
if (escapeParseMode == LexerEscapeParseMode::String && m_current == stringQuoteCharacter) {
if (shouldBuildStrings)
record16('u');
return StringParsedSuccessfully;
}
auto character = parseUnicodeEscape();
if (character.isValid()) {
if (shouldBuildStrings)
recordUnicodeCodePoint(character.value());
return StringParsedSuccessfully;
}
m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
return atEnd() ? StringUnterminated : StringCannotBeParsed;
}
if (strictMode) {
if (isASCIIDigit(m_current)) {
int character1 = m_current;
shift();
if (character1 != '0' || isASCIIDigit(m_current)) {
if (character1 == '0')
shift();
m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
return atEnd() ? StringUnterminated : StringCannotBeParsed;
}
if (shouldBuildStrings)
record16(0);
return StringParsedSuccessfully;
}
} else {
if (isASCIIOctalDigit(m_current)) {
T character1 = m_current;
shift();
if (isASCIIOctalDigit(m_current)) {
T character2 = m_current;
shift();
if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
if (shouldBuildStrings)
record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
shift();
} else {
if (shouldBuildStrings)
record16((character1 - '0') * 8 + character2 - '0');
}
} else {
if (shouldBuildStrings)
record16(character1 - '0');
}
return StringParsedSuccessfully;
}
}
if (!atEnd()) {
if (shouldBuildStrings)
record16(m_current);
shift();
return StringParsedSuccessfully;
}
m_lexErrorMessage = "Unterminated string constant"_s;
return StringUnterminated;
}
template <typename T>
template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
{
T stringQuoteCharacter = m_current;
shift();
const T* stringStart = currentSourcePtr();
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\')) {
if (stringStart != currentSourcePtr() && shouldBuildStrings)
append16(stringStart, currentSourcePtr() - stringStart);
shift();
LChar escape = singleEscape(m_current);
if (escape) {
if (shouldBuildStrings)
record16(escape);
shift();
} else if (UNLIKELY(isLineTerminator(m_current)))
shiftLineTerminator();
else {
StringParseResult result = parseComplexEscape<shouldBuildStrings, LexerEscapeParseMode::String>(strictMode, stringQuoteCharacter);
if (result != StringParsedSuccessfully)
return result;
}
stringStart = currentSourcePtr();
continue;
}
static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
if (UNLIKELY(m_current < 0xE)) {
if (atEnd() || m_current == '\r' || m_current == '\n') {
m_lexErrorMessage = "Unexpected EOF"_s;
return atEnd() ? StringUnterminated : StringCannotBeParsed;
}
}
shift();
}
if (currentSourcePtr() != stringStart && shouldBuildStrings)
append16(stringStart, currentSourcePtr() - stringStart);
if (shouldBuildStrings)
tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
else
tokenData->ident = 0;
m_buffer16.shrink(0);
return StringParsedSuccessfully;
}
template <typename T>
typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
{
bool parseCookedFailed = false;
const T* stringStart = currentSourcePtr();
const T* rawStringStart = currentSourcePtr();
while (m_current != '`') {
if (UNLIKELY(m_current == '\\')) {
if (stringStart != currentSourcePtr())
append16(stringStart, currentSourcePtr() - stringStart);
shift();
LChar escape = singleEscape(m_current);
if (escape) {
record16(escape);
shift();
} else if (UNLIKELY(isLineTerminator(m_current))) {
if (m_current == '\r') {
ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
m_bufferForRawTemplateString16.append('\n');
}
shiftLineTerminator();
rawStringStart = currentSourcePtr();
} else
shiftLineTerminator();
} else {
bool strictMode = true;
StringParseResult result = parseComplexEscape<true, LexerEscapeParseMode::Template>(strictMode, '`');
if (result != StringParsedSuccessfully) {
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
parseCookedFailed = true;
else
return result;
}
}
stringStart = currentSourcePtr();
continue;
}
if (m_current == '$' && peek(1) == '{')
break;
if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
if (atEnd()) {
m_lexErrorMessage = "Unexpected EOF"_s;
return StringUnterminated;
}
if (isLineTerminator(m_current)) {
if (m_current == '\r') {
if (stringStart != currentSourcePtr())
append16(stringStart, currentSourcePtr() - stringStart);
if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
record16('\n');
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
m_bufferForRawTemplateString16.append('\n');
shiftLineTerminator();
stringStart = currentSourcePtr();
rawStringStart = currentSourcePtr();
} else
shiftLineTerminator();
continue;
}
}
shift();
}
bool isTail = m_current == '`';
if (currentSourcePtr() != stringStart)
append16(stringStart, currentSourcePtr() - stringStart);
if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
if (!parseCookedFailed)
tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
else
tokenData->cooked = nullptr;
if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
else
tokenData->raw = nullptr;
tokenData->isTail = isTail;
m_buffer16.shrink(0);
m_bufferForRawTemplateString16.shrink(0);
if (isTail) {
shift();
} else {
shift();
shift();
}
return StringParsedSuccessfully;
}
template <typename T>
ALWAYS_INLINE auto Lexer<T>::parseHex() -> NumberParseResult
{
uint32_t hexValue = 0;
int maximumDigits = 7;
do {
hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
shift();
--maximumDigits;
} while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
return hexValue;
for (int i = 0; i < 8; ++i) {
int digit = hexValue >> 28;
if (digit < 10)
record8(digit + '0');
else
record8(digit - 10 + 'a');
hexValue <<= 4;
}
while (isASCIIHexDigit(m_current)) {
record8(m_current);
shift();
}
if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
return makeIdentifier(m_buffer8.data(), m_buffer8.size());
return parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
}
template <typename T>
ALWAYS_INLINE auto Lexer<T>::parseBinary() -> std::optional<NumberParseResult>
{
uint32_t binaryValue = 0;
const unsigned maximumDigits = 32;
int digit = maximumDigits - 1;
LChar digits[maximumDigits];
do {
binaryValue = (binaryValue << 1) + (m_current - '0');
digits[digit] = m_current;
shift();
--digit;
} while (isASCIIBinaryDigit(m_current) && digit >= 0);
if (LIKELY(!isASCIIDigit(m_current) && digit >= 0 && m_current != 'n'))
return Variant<double, const Identifier*> { binaryValue };
for (int i = maximumDigits - 1; i > digit; --i)
record8(digits[i]);
while (isASCIIBinaryDigit(m_current)) {
record8(m_current);
shift();
}
if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
if (isASCIIDigit(m_current))
return std::nullopt;
return Variant<double, const Identifier*> { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
}
template <typename T>
ALWAYS_INLINE auto Lexer<T>::parseOctal() -> std::optional<NumberParseResult>
{
uint32_t octalValue = 0;
const unsigned maximumDigits = 10;
int digit = maximumDigits - 1;
LChar digits[maximumDigits];
do {
octalValue = octalValue * 8 + (m_current - '0');
digits[digit] = m_current;
shift();
--digit;
} while (isASCIIOctalDigit(m_current) && digit >= 0);
if (LIKELY(!isASCIIDigit(m_current) && digit >= 0 && m_current != 'n'))
return Variant<double, const Identifier*> { octalValue };
for (int i = maximumDigits - 1; i > digit; --i)
record8(digits[i]);
while (isASCIIOctalDigit(m_current)) {
record8(m_current);
shift();
}
if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
if (isASCIIDigit(m_current))
return std::nullopt;
return Variant<double, const Identifier*> { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
}
template <typename T>
ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> std::optional<NumberParseResult>
{
uint32_t decimalValue = 0;
if (!m_buffer8.size()) {
const unsigned maximumDigits = 10;
int digit = maximumDigits - 1;
LChar digits[maximumDigits];
do {
decimalValue = decimalValue * 10 + (m_current - '0');
digits[digit] = m_current;
shift();
--digit;
} while (isASCIIDigit(m_current) && digit >= 0);
if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
return Variant<double, const Identifier*> { decimalValue };
for (int i = maximumDigits - 1; i > digit; --i)
record8(digits[i]);
}
while (isASCIIDigit(m_current)) {
record8(m_current);
shift();
}
if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
return std::nullopt;
}
template <typename T>
ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
{
record8('.');
while (isASCIIDigit(m_current)) {
record8(m_current);
shift();
}
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
{
record8('e');
shift();
if (m_current == '+' || m_current == '-') {
record8(m_current);
shift();
}
if (!isASCIIDigit(m_current))
return false;
do {
record8(m_current);
shift();
} while (isASCIIDigit(m_current));
return true;
}
template <typename T>
ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
{
while (true) {
while (UNLIKELY(m_current == '*')) {
shift();
if (m_current == '/') {
shift();
return true;
}
}
if (atEnd())
return false;
if (isLineTerminator(m_current)) {
shiftLineTerminator();
m_terminator = true;
} else
shift();
}
}
template <typename T>
ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
{
if (!consume("source"))
return;
if (consume("URL=")) {
m_sourceURLDirective = parseCommentDirectiveValue();
return;
}
if (consume("MappingURL=")) {
m_sourceMappingURLDirective = parseCommentDirectiveValue();
return;
}
}
template <typename T>
ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
{
skipWhitespace();
const T* stringStart = currentSourcePtr();
while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != '"' && m_current != '\'' && !atEnd())
shift();
const T* stringEnd = currentSourcePtr();
skipWhitespace();
if (!isLineTerminator(m_current) && !atEnd())
return String();
append8(stringStart, stringEnd - stringStart);
String result = String(m_buffer8.data(), m_buffer8.size());
m_buffer8.shrink(0);
return result;
}
template <typename T>
template <unsigned length>
ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
{
unsigned lengthToCheck = length - 1;
unsigned i = 0;
for (; i < lengthToCheck && m_current == input[i]; i++)
shift();
return i == lengthToCheck;
}
template <typename T>
bool Lexer<T>::nextTokenIsColon()
{
const T* code = m_code;
while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
code++;
return code < m_codeEnd && *code == ':';
}
template <typename T>
void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
{
JSTokenLocation* tokenLocation = &tokenRecord->m_location;
tokenLocation->line = lineNumber;
tokenLocation->endOffset = endOffset;
tokenLocation->lineStartOffset = lineStartOffset;
ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
tokenRecord->m_endPosition = endPosition;
m_lastToken = token;
}
template <typename T>
JSTokenType Lexer<T>::lex(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
{
JSTokenData* tokenData = &tokenRecord->m_data;
JSTokenLocation* tokenLocation = &tokenRecord->m_location;
m_lastTokenLocation = JSTokenLocation(tokenRecord->m_location);
ASSERT(!m_error);
ASSERT(m_buffer8.isEmpty());
ASSERT(m_buffer16.isEmpty());
JSTokenType token = ERRORTOK;
m_terminator = false;
start:
skipWhitespace();
if (atEnd())
return EOFTOK;
tokenLocation->startOffset = currentOffset();
ASSERT(currentOffset() >= currentLineStartOffset());
tokenRecord->m_startPosition = currentPosition();
CharacterType type;
if (LIKELY(isLatin1(m_current)))
type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
else if (isNonLatin1IdentStart(m_current))
type = CharacterIdentifierStart;
else if (isLineTerminator(m_current))
type = CharacterLineTerminator;
else
type = CharacterInvalid;
switch (type) {
case CharacterGreater:
shift();
if (m_current == '>') {
shift();
if (m_current == '>') {
shift();
if (m_current == '=') {
shift();
token = URSHIFTEQUAL;
break;
}
token = URSHIFT;
break;
}
if (m_current == '=') {
shift();
token = RSHIFTEQUAL;
break;
}
token = RSHIFT;
break;
}
if (m_current == '=') {
shift();
token = GE;
break;
}
token = GT;
break;
case CharacterEqual: {
if (peek(1) == '>') {
token = ARROWFUNCTION;
tokenData->line = lineNumber();
tokenData->offset = currentOffset();
tokenData->lineStartOffset = currentLineStartOffset();
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
shift();
shift();
break;
}
shift();
if (m_current == '=') {
shift();
if (m_current == '=') {
shift();
token = STREQ;
break;
}
token = EQEQ;
break;
}
token = EQUAL;
break;
}
case CharacterLess:
shift();
if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
if (m_scriptMode == JSParserScriptMode::Classic) {
goto inSingleLineComment;
}
}
if (m_current == '<') {
shift();
if (m_current == '=') {
shift();
token = LSHIFTEQUAL;
break;
}
token = LSHIFT;
break;
}
if (m_current == '=') {
shift();
token = LE;
break;
}
token = LT;
break;
case CharacterExclamationMark:
shift();
if (m_current == '=') {
shift();
if (m_current == '=') {
shift();
token = STRNEQ;
break;
}
token = NE;
break;
}
token = EXCLAMATION;
break;
case CharacterAdd:
shift();
if (m_current == '+') {
shift();
token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
break;
}
if (m_current == '=') {
shift();
token = PLUSEQUAL;
break;
}
token = PLUS;
break;
case CharacterSub:
shift();
if (m_current == '-') {
shift();
if ((m_atLineStart || m_terminator) && m_current == '>') {
if (m_scriptMode == JSParserScriptMode::Classic) {
shift();
goto inSingleLineComment;
}
}
token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
break;
}
if (m_current == '=') {
shift();
token = MINUSEQUAL;
break;
}
token = MINUS;
break;
case CharacterMultiply:
shift();
if (m_current == '=') {
shift();
token = MULTEQUAL;
break;
}
if (m_current == '*') {
shift();
if (m_current == '=') {
shift();
token = POWEQUAL;
break;
}
token = POW;
break;
}
token = TIMES;
break;
case CharacterSlash:
shift();
if (m_current == '/') {
shift();
goto inSingleLineCommentCheckForDirectives;
}
if (m_current == '*') {
shift();
if (parseMultilineComment())
goto start;
m_lexErrorMessage = "Multiline comment was not closed properly"_s;
token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
goto returnError;
}
if (m_current == '=') {
shift();
token = DIVEQUAL;
break;
}
token = DIVIDE;
break;
case CharacterAnd:
shift();
if (m_current == '&') {
shift();
token = AND;
break;
}
if (m_current == '=') {
shift();
token = ANDEQUAL;
break;
}
token = BITAND;
break;
case CharacterXor:
shift();
if (m_current == '=') {
shift();
token = XOREQUAL;
break;
}
token = BITXOR;
break;
case CharacterModulo:
shift();
if (m_current == '=') {
shift();
token = MODEQUAL;
break;
}
token = MOD;
break;
case CharacterOr:
shift();
if (m_current == '=') {
shift();
token = OREQUAL;
break;
}
if (m_current == '|') {
shift();
token = OR;
break;
}
token = BITOR;
break;
case CharacterOpenParen:
token = OPENPAREN;
tokenData->line = lineNumber();
tokenData->offset = currentOffset();
tokenData->lineStartOffset = currentLineStartOffset();
shift();
break;
case CharacterCloseParen:
token = CLOSEPAREN;
shift();
break;
case CharacterOpenBracket:
token = OPENBRACKET;
shift();
break;
case CharacterCloseBracket:
token = CLOSEBRACKET;
shift();
break;
case CharacterComma:
token = COMMA;
shift();
break;
case CharacterColon:
token = COLON;
shift();
break;
case CharacterQuestion:
token = QUESTION;
shift();
break;
case CharacterTilde:
token = TILDE;
shift();
break;
case CharacterSemicolon:
shift();
token = SEMICOLON;
break;
case CharacterBackQuote:
shift();
token = BACKQUOTE;
break;
case CharacterOpenBrace:
tokenData->line = lineNumber();
tokenData->offset = currentOffset();
tokenData->lineStartOffset = currentLineStartOffset();
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
shift();
token = OPENBRACE;
break;
case CharacterCloseBrace:
tokenData->line = lineNumber();
tokenData->offset = currentOffset();
tokenData->lineStartOffset = currentLineStartOffset();
ASSERT(tokenData->offset >= tokenData->lineStartOffset);
shift();
token = CLOSEBRACE;
break;
case CharacterDot:
shift();
if (!isASCIIDigit(m_current)) {
if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
shift();
shift();
token = DOTDOTDOT;
break;
}
token = DOT;
break;
}
parseNumberAfterDecimalPoint();
token = DOUBLE;
if (isASCIIAlphaCaselessEqual(m_current, 'e')) {
if (!parseNumberAfterExponentIndicator()) {
m_lexErrorMessage = "Non-number found after exponent indicator"_s;
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
}
size_t parsedLength;
tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
if (token == INTEGER)
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
if (UNLIKELY(isIdentStart(m_current))) {
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
m_buffer8.shrink(0);
break;
case CharacterZero:
shift();
if (isASCIIAlphaCaselessEqual(m_current, 'x')) {
if (!isASCIIHexDigit(peek(1))) {
m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
goto returnError;
}
shift();
auto parseNumberResult = parseHex();
if (WTF::holds_alternative<double>(parseNumberResult))
tokenData->doubleValue = WTF::get<double>(parseNumberResult);
else {
token = BIGINT;
shift();
tokenData->bigIntString = WTF::get<const Identifier*>(parseNumberResult);
tokenData->radix = 16;
}
if (isIdentStart(m_current)) {
m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
goto returnError;
}
if (LIKELY(token != BIGINT))
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
m_buffer8.shrink(0);
break;
}
if (isASCIIAlphaCaselessEqual(m_current, 'b')) {
if (!isASCIIBinaryDigit(peek(1))) {
m_lexErrorMessage = "No binary digits after '0b'"_s;
token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
goto returnError;
}
shift();
auto parseNumberResult = parseBinary();
if (!parseNumberResult)
tokenData->doubleValue = 0;
else if (WTF::holds_alternative<double>(*parseNumberResult))
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
else {
token = BIGINT;
shift();
tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
tokenData->radix = 2;
}
if (isIdentStart(m_current)) {
m_lexErrorMessage = "No space between binary literal and identifier"_s;
token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
goto returnError;
}
if (LIKELY(token != BIGINT))
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
m_buffer8.shrink(0);
break;
}
if (isASCIIAlphaCaselessEqual(m_current, 'o')) {
if (!isASCIIOctalDigit(peek(1))) {
m_lexErrorMessage = "No octal digits after '0o'"_s;
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
goto returnError;
}
shift();
auto parseNumberResult = parseOctal();
if (!parseNumberResult)
tokenData->doubleValue = 0;
else if (WTF::holds_alternative<double>(*parseNumberResult))
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
else {
token = BIGINT;
shift();
tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
tokenData->radix = 8;
}
if (isIdentStart(m_current)) {
m_lexErrorMessage = "No space between octal literal and identifier"_s;
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
goto returnError;
}
if (LIKELY(token != BIGINT))
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
m_buffer8.shrink(0);
break;
}
record8('0');
if (strictMode && isASCIIDigit(m_current)) {
m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
goto returnError;
}
if (isASCIIOctalDigit(m_current)) {
auto parseNumberResult = parseOctal();
if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
}
}
FALLTHROUGH;
case CharacterNumber:
if (LIKELY(token != INTEGER && token != DOUBLE)) {
auto parseNumberResult = parseDecimal();
if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
} else {
if (parseNumberResult) {
ASSERT(WTF::get<const Identifier*>(*parseNumberResult));
token = BIGINT;
shift();
tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
tokenData->radix = 10;
} else {
token = INTEGER;
if (m_current == '.') {
shift();
parseNumberAfterDecimalPoint();
token = DOUBLE;
}
if (isASCIIAlphaCaselessEqual(m_current, 'e')) {
if (!parseNumberAfterExponentIndicator()) {
m_lexErrorMessage = "Non-number found after exponent indicator"_s;
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
}
size_t parsedLength;
tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
if (token == INTEGER)
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
}
}
}
if (UNLIKELY(isIdentStart(m_current))) {
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
goto returnError;
}
m_buffer8.shrink(0);
break;
case CharacterQuote: {
StringParseResult result = StringCannotBeParsed;
if (lexerFlags & LexerFlagsDontBuildStrings)
result = parseString<false>(tokenData, strictMode);
else
result = parseString<true>(tokenData, strictMode);
if (UNLIKELY(result != StringParsedSuccessfully)) {
token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
goto returnError;
}
shift();
token = STRING;
break;
}
case CharacterIdentifierStart:
ASSERT(isIdentStart(m_current));
FALLTHROUGH;
case CharacterBackSlash:
parseIdent:
if (lexerFlags & LexexFlagsDontBuildKeywords)
token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
else
token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
break;
case CharacterLineTerminator:
ASSERT(isLineTerminator(m_current));
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
m_lineStart = m_code;
goto start;
case CharacterPrivateIdentifierStart:
if (m_parsingBuiltinFunction)
goto parseIdent;
FALLTHROUGH;
case CharacterInvalid:
m_lexErrorMessage = invalidCharacterMessage();
token = ERRORTOK;
goto returnError;
default:
RELEASE_ASSERT_NOT_REACHED();
m_lexErrorMessage = "Internal Error"_s;
token = ERRORTOK;
goto returnError;
}
m_atLineStart = false;
goto returnToken;
inSingleLineCommentCheckForDirectives:
if (UNLIKELY((m_current == '#' || m_current == '@') && isWhiteSpace(peek(1)))) {
shift();
shift();
parseCommentDirective();
}
inSingleLineComment:
{
auto lineNumber = m_lineNumber;
auto endOffset = currentOffset();
auto lineStartOffset = currentLineStartOffset();
auto endPosition = currentPosition();
while (!isLineTerminator(m_current)) {
if (atEnd())
return EOFTOK;
shift();
}
shiftLineTerminator();
m_atLineStart = true;
m_terminator = true;
m_lineStart = m_code;
if (!lastTokenWasRestrKeyword())
goto start;
token = SEMICOLON;
fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
return token;
}
returnToken:
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
return token;
returnError:
m_error = true;
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
RELEASE_ASSERT(token & ErrorTokenFlag);
return token;
}
template <typename T>
static inline void orCharacter(UChar&, UChar);
template <>
inline void orCharacter<LChar>(UChar&, UChar) { }
template <>
inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
{
orAccumulator |= character;
}
template <typename T>
JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
{
JSTokenData* tokenData = &tokenRecord->m_data;
ASSERT(m_buffer16.isEmpty());
bool lastWasEscape = false;
bool inBrackets = false;
UChar charactersOredTogether = 0;
if (patternPrefix) {
ASSERT(!isLineTerminator(patternPrefix));
ASSERT(patternPrefix != '/');
ASSERT(patternPrefix != '[');
record16(patternPrefix);
}
while (true) {
if (isLineTerminator(m_current) || atEnd()) {
m_buffer16.shrink(0);
JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
m_error = true;
m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
return token;
}
T prev = m_current;
shift();
if (prev == '/' && !lastWasEscape && !inBrackets)
break;
record16(prev);
orCharacter<T>(charactersOredTogether, prev);
if (lastWasEscape) {
lastWasEscape = false;
continue;
}
switch (prev) {
case '[':
inBrackets = true;
break;
case ']':
inBrackets = false;
break;
case '\\':
lastWasEscape = true;
break;
}
}
tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
m_buffer16.shrink(0);
charactersOredTogether = 0;
while (isIdentPart(m_current)) {
record16(m_current);
orCharacter<T>(charactersOredTogether, m_current);
shift();
}
tokenData->flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
m_buffer16.shrink(0);
m_atLineStart = false;
JSTokenType token = REGEXP;
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
return token;
}
template <typename T>
JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
{
JSTokenData* tokenData = &tokenRecord->m_data;
ASSERT(!m_error);
ASSERT(m_buffer16.isEmpty());
StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
JSTokenType token = ERRORTOK;
if (UNLIKELY(result != StringParsedSuccessfully)) {
token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
m_error = true;
} else
token = TEMPLATE;
m_atLineStart = false;
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
return token;
}
template <typename T>
void Lexer<T>::clear()
{
m_arena = 0;
Vector<LChar> newBuffer8;
m_buffer8.swap(newBuffer8);
Vector<UChar> newBuffer16;
m_buffer16.swap(newBuffer16);
Vector<UChar> newBufferForRawTemplateString16;
m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
m_isReparsingFunction = false;
}
template class Lexer<LChar>;
template class Lexer<UChar>;
}