MarkupTokenizerBase.h [plain text]
#ifndef MarkupTokenizerBase_h
#define MarkupTokenizerBase_h
#include "SegmentedString.h"
#include <wtf/Noncopyable.h>
#include <wtf/PassOwnPtr.h>
#include <wtf/Vector.h>
#include <wtf/text/AtomicString.h>
#include <wtf/text/TextPosition.h>
namespace WebCore {
template<typename Token, typename State>
class MarkupTokenizerBase {
WTF_MAKE_NONCOPYABLE(MarkupTokenizerBase);
WTF_MAKE_FAST_ALLOCATED;
public:
virtual ~MarkupTokenizerBase() { }
OrdinalNumber lineNumber() const { return OrdinalNumber::fromZeroBasedInt(m_lineNumber); }
typename State::State state() const { return m_state; }
void setState(typename State::State state) { m_state = state; }
bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; }
void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; }
inline bool shouldSkipNullCharacters() const;
protected:
class InputStreamPreprocessor {
WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
public:
InputStreamPreprocessor(MarkupTokenizerBase<Token, State>* tokenizer)
: m_tokenizer(tokenizer)
, m_nextInputCharacter('\0')
, m_skipNextNewLine(false)
{
}
UChar nextInputCharacter() const { return m_nextInputCharacter; }
ALWAYS_INLINE bool peek(SegmentedString& source, int& lineNumber)
{
PeekAgain:
m_nextInputCharacter = *source;
static const UChar specialCharacterMask = '\n' | '\r' | '\0';
if (m_nextInputCharacter & ~specialCharacterMask) {
m_skipNextNewLine = false;
return true;
}
if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
m_skipNextNewLine = false;
source.advancePastNewline(lineNumber);
if (source.isEmpty())
return false;
m_nextInputCharacter = *source;
}
if (m_nextInputCharacter == '\r') {
m_nextInputCharacter = '\n';
m_skipNextNewLine = true;
} else {
m_skipNextNewLine = false;
if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
if (m_tokenizer->shouldSkipNullCharacters()) {
source.advancePastNonNewline();
if (source.isEmpty())
return false;
goto PeekAgain;
}
m_nextInputCharacter = 0xFFFD;
}
}
return true;
}
bool advance(SegmentedString& source, int& lineNumber)
{
source.advance(lineNumber);
if (source.isEmpty())
return false;
return peek(source, lineNumber);
}
static const UChar endOfFileMarker = 0;
private:
bool shouldTreatNullAsEndOfFileMarker(SegmentedString& source) const
{
return source.isClosed() && source.length() == 1;
}
MarkupTokenizerBase<Token, State>* m_tokenizer;
UChar m_nextInputCharacter;
bool m_skipNextNewLine;
};
MarkupTokenizerBase() : m_inputStreamPreprocessor(this) { reset(); }
inline void bufferCharacter(UChar character)
{
ASSERT(character != InputStreamPreprocessor::endOfFileMarker);
m_token->ensureIsCharacterToken();
m_token->appendToCharacter(character);
}
inline void bufferCodePoint(unsigned);
inline bool emitAndResumeIn(SegmentedString& source, typename State::State state)
{
m_state = state;
source.advance(m_lineNumber);
return true;
}
inline bool emitAndReconsumeIn(SegmentedString&, typename State::State state)
{
m_state = state;
return true;
}
inline bool emitEndOfFile(SegmentedString& source)
{
if (haveBufferedCharacterToken())
return true;
m_state = State::DataState;
source.advance(m_lineNumber);
m_token->clear();
m_token->makeEndOfFile();
return true;
}
void reset()
{
m_state = State::DataState;
m_token = 0;
m_lineNumber = 0;
}
inline bool haveBufferedCharacterToken()
{
return m_token->type() == Token::Type::Character;
}
typename State::State m_state;
Token* m_token;
int m_lineNumber;
bool m_forceNullCharacterReplacement;
UChar m_additionalAllowedCharacter;
InputStreamPreprocessor m_inputStreamPreprocessor;
};
}
#endif // MarkupTokenizerBase_h