CharacterReferenceParserInlines.h [plain text]
#pragma once
#include <wtf/text/StringBuilder.h>
namespace WebCore {
inline void unconsumeCharacters(SegmentedString& source, StringBuilder& consumedCharacters)
{
source.pushBack(consumedCharacters.toString());
}
template <typename ParserFunctions>
bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCharacter, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
{
ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
ASSERT(!notEnoughCharacters);
ASSERT(decodedCharacter.isEmpty());
enum {
Initial,
Number,
MaybeHexLowerCaseX,
MaybeHexUpperCaseX,
Hex,
Decimal,
Named
} state = Initial;
UChar32 result = 0;
bool overflow = false;
StringBuilder consumedCharacters;
while (!source.isEmpty()) {
UChar character = source.currentCharacter();
switch (state) {
case Initial:
if (character == '\x09' || character == '\x0A' || character == '\x0C' || character == ' ' || character == '<' || character == '&')
return false;
if (additionalAllowedCharacter && character == additionalAllowedCharacter)
return false;
if (character == '#') {
state = Number;
break;
}
if (isASCIIAlpha(character)) {
state = Named;
goto Named;
}
return false;
case Number:
if (character == 'x') {
state = MaybeHexLowerCaseX;
break;
}
if (character == 'X') {
state = MaybeHexUpperCaseX;
break;
}
if (isASCIIDigit(character)) {
state = Decimal;
goto Decimal;
}
source.pushBack("#"_s);
return false;
case MaybeHexLowerCaseX:
if (isASCIIHexDigit(character)) {
state = Hex;
goto Hex;
}
source.pushBack("#x"_s);
return false;
case MaybeHexUpperCaseX:
if (isASCIIHexDigit(character)) {
state = Hex;
goto Hex;
}
source.pushBack("#X"_s);
return false;
case Hex:
Hex:
if (isASCIIHexDigit(character)) {
result = result * 16 + toASCIIHexValue(character);
if (result > UCHAR_MAX_VALUE)
overflow = true;
break;
}
if (character == ';') {
source.advancePastNonNewline();
decodedCharacter.appendCharacter(ParserFunctions::legalEntityFor(overflow ? 0 : result));
return true;
}
if (ParserFunctions::acceptMalformed()) {
decodedCharacter.appendCharacter(ParserFunctions::legalEntityFor(overflow ? 0 : result));
return true;
}
unconsumeCharacters(source, consumedCharacters);
return false;
case Decimal:
Decimal:
if (isASCIIDigit(character)) {
result = result * 10 + character - '0';
if (result > UCHAR_MAX_VALUE)
overflow = true;
break;
}
if (character == ';') {
source.advancePastNonNewline();
decodedCharacter.appendCharacter(ParserFunctions::legalEntityFor(overflow ? 0 : result));
return true;
}
if (ParserFunctions::acceptMalformed()) {
decodedCharacter.appendCharacter(ParserFunctions::legalEntityFor(overflow ? 0 : result));
return true;
}
unconsumeCharacters(source, consumedCharacters);
return false;
case Named:
Named:
return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, character);
}
consumedCharacters.append(character);
source.advancePastNonNewline();
}
ASSERT(source.isEmpty());
notEnoughCharacters = true;
unconsumeCharacters(source, consumedCharacters);
return false;
}
}