SegmentedString.h   [plain text]


/*
    Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301, USA.
*/

#ifndef SegmentedString_h
#define SegmentedString_h

#include <wtf/Deque.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/text/TextPosition.h>
#include <wtf/text/WTFString.h>

namespace WebCore {

class SegmentedString;

class SegmentedSubstring {
public:
    SegmentedSubstring()
        : m_length(0)
        , m_doNotExcludeLineNumbers(true)
        , m_is8Bit(false)
    {
        m_data.string16Ptr = 0;
    }

    SegmentedSubstring(const String& str)
        : m_length(str.length())
        , m_doNotExcludeLineNumbers(true)
        , m_string(str)
    {
        if (m_length) {
            if (m_string.is8Bit()) {
                m_is8Bit = true;
                m_data.string8Ptr = m_string.characters8();
            } else {
                m_is8Bit = false;
                m_data.string16Ptr = m_string.characters16();
            }
        } else
            m_is8Bit = false;
    }

    void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
    
    bool is8Bit() { return m_is8Bit; }
    
    bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
    bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }

    void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }

    int numberOfCharactersConsumed() const { return m_string.length() - m_length; }

    void appendTo(StringBuilder& builder) const
    {
        int offset = m_string.length() - m_length;

        if (!offset) {
            if (m_length)
                builder.append(m_string);
        } else
            builder.append(m_string.substring(offset, m_length));
    }

    UChar getCurrentChar8()
    {
        return *m_data.string8Ptr;
    }

    UChar getCurrentChar16()
    {
        return m_data.string16Ptr ? *m_data.string16Ptr : 0;
    }

    UChar incrementAndGetCurrentChar8()
    {
        ASSERT(m_data.string8Ptr);
        return *++m_data.string8Ptr;
    }

    UChar incrementAndGetCurrentChar16()
    {
        ASSERT(m_data.string16Ptr);
        return *++m_data.string16Ptr;
    }

    String currentSubString(unsigned length)
    {
        int offset = m_string.length() - m_length;
        return m_string.substring(offset, length);
    }

    ALWAYS_INLINE UChar getCurrentChar()
    {
        ASSERT(m_length);
        if (is8Bit())
            return getCurrentChar8();
        return getCurrentChar16();
    }
    
    ALWAYS_INLINE UChar incrementAndGetCurrentChar()
    {
        ASSERT(m_length);
        if (is8Bit())
            return incrementAndGetCurrentChar8();
        return incrementAndGetCurrentChar16();
    }

public:
    union {
        const LChar* string8Ptr;
        const UChar* string16Ptr;
    } m_data;
    int m_length;

private:
    bool m_doNotExcludeLineNumbers;
    bool m_is8Bit;
    String m_string;
};

class SegmentedString {
public:
    SegmentedString()
        : m_pushedChar1(0)
        , m_pushedChar2(0)
        , m_currentChar(0)
        , m_numberOfCharactersConsumedPriorToCurrentString(0)
        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
        , m_currentLine(0)
        , m_closed(false)
        , m_empty(true)
        , m_fastPathFlags(NoFastPath)
        , m_advanceFunc(&SegmentedString::advanceEmpty)
        , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
    {
    }

    SegmentedString(const String& str)
        : m_pushedChar1(0)
        , m_pushedChar2(0)
        , m_currentString(str)
        , m_currentChar(0)
        , m_numberOfCharactersConsumedPriorToCurrentString(0)
        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
        , m_currentLine(0)
        , m_closed(false)
        , m_empty(!str.length())
        , m_fastPathFlags(NoFastPath)
    {
        if (m_currentString.m_length)
            m_currentChar = m_currentString.getCurrentChar();
        updateAdvanceFunctionPointers();
    }

    SegmentedString(const SegmentedString&);

    const SegmentedString& operator=(const SegmentedString&);

    void clear();
    void close();

    void append(const SegmentedString&);
    void prepend(const SegmentedString&);

    bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
    void setExcludeLineNumbers();

    void push(UChar c)
    {
        if (!m_pushedChar1) {
            m_pushedChar1 = c;
            m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
            updateSlowCaseFunctionPointers();
        } else {
            ASSERT(!m_pushedChar2);
            m_pushedChar2 = c;
        }
    }

    bool isEmpty() const { return m_empty; }
    unsigned length() const;

    bool isClosed() const { return m_closed; }

    enum LookAheadResult {
        DidNotMatch,
        DidMatch,
        NotEnoughCharacters,
    };

    LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
    LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }

    void advance()
    {
        if (m_fastPathFlags & Use8BitAdvance) {
            ASSERT(!m_pushedChar1);
            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
            m_currentChar = m_currentString.incrementAndGetCurrentChar8();

            if (!haveOneCharacterLeft)
                return;

            updateSlowCaseFunctionPointers();

            return;
        }

        (this->*m_advanceFunc)();
    }

    inline void advanceAndUpdateLineNumber()
    {
        if (m_fastPathFlags & Use8BitAdvance) {
            ASSERT(!m_pushedChar1);

            bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);

            m_currentChar = m_currentString.incrementAndGetCurrentChar8();

            if (!(haveNewLine | haveOneCharacterLeft))
                return;

            if (haveNewLine) {
                ++m_currentLine;
                m_numberOfCharactersConsumedPriorToCurrentLine =  m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
            }

            if (haveOneCharacterLeft)
                updateSlowCaseFunctionPointers();

            return;
        }

        (this->*m_advanceAndUpdateLineNumberFunc)();
    }

    void advanceAndASSERT(UChar expectedCharacter)
    {
        ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
        advance();
    }

    void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
    {
        ASSERT_UNUSED(expectedCharacter, u_foldCase(currentChar(), U_FOLD_CASE_DEFAULT) == u_foldCase(expectedCharacter, U_FOLD_CASE_DEFAULT));
        advance();
    }

    void advancePastNonNewline()
    {
        ASSERT(currentChar() != '\n');
        advance();
    }

    void advancePastNewlineAndUpdateLineNumber()
    {
        ASSERT(currentChar() == '\n');
        if (!m_pushedChar1 && m_currentString.m_length > 1) {
            int newLineFlag = m_currentString.doNotExcludeLineNumbers();
            m_currentLine += newLineFlag;
            if (newLineFlag)
                m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
            decrementAndCheckLength();
            m_currentChar = m_currentString.incrementAndGetCurrentChar();
            return;
        }
        advanceAndUpdateLineNumberSlowCase();
    }

    // Writes the consumed characters into consumedCharacters, which must
    // have space for at least |count| characters.
    void advance(unsigned count, UChar* consumedCharacters);

    bool escaped() const { return m_pushedChar1; }

    int numberOfCharactersConsumed() const
    {
        int numberOfPushedCharacters = 0;
        if (m_pushedChar1) {
            ++numberOfPushedCharacters;
            if (m_pushedChar2)
                ++numberOfPushedCharacters;
        }
        return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
    }

    String toString() const;

    UChar currentChar() const { return m_currentChar; }    

    // The method is moderately slow, comparing to currentLine method.
    OrdinalNumber currentColumn() const;
    OrdinalNumber currentLine() const;
    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
    // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
    void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);

private:
    enum FastPathFlags {
        NoFastPath = 0,
        Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
        Use8BitAdvance = 1 << 1,
    };

    void append(const SegmentedSubstring&);
    void prepend(const SegmentedSubstring&);

    void advance8();
    void advance16();
    void advanceAndUpdateLineNumber8();
    void advanceAndUpdateLineNumber16();
    void advanceSlowCase();
    void advanceAndUpdateLineNumberSlowCase();
    void advanceEmpty();
    void advanceSubstring();
    
    void updateSlowCaseFunctionPointers();

    void decrementAndCheckLength()
    {
        ASSERT(m_currentString.m_length > 1);
        if (--m_currentString.m_length == 1)
            updateSlowCaseFunctionPointers();
    }

    void updateAdvanceFunctionPointers()
    {
        if ((m_currentString.m_length > 1) && !m_pushedChar1) {
            if (m_currentString.is8Bit()) {
                m_advanceFunc = &SegmentedString::advance8;
                m_fastPathFlags = Use8BitAdvance;
                if (m_currentString.doNotExcludeLineNumbers()) {
                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
                    m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
                } else
                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
                return;
            }

            m_advanceFunc = &SegmentedString::advance16;
            m_fastPathFlags = NoFastPath;
            if (m_currentString.doNotExcludeLineNumbers())
                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
            else
                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
            return;
        }

        if (!m_currentString.m_length && !isComposite()) {
            m_advanceFunc = &SegmentedString::advanceEmpty;
            m_fastPathFlags = NoFastPath;
            m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
        }

        updateSlowCaseFunctionPointers();
    }

    inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
    {
        if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
            String currentSubstring = m_currentString.currentSubString(string.length());
            if (currentSubstring.startsWith(string, caseSensitive))
                return DidMatch;
            return DidNotMatch;
        }
        return lookAheadSlowCase(string, caseSensitive);
    }
    
    LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
    {
        unsigned count = string.length();
        if (count > length())
            return NotEnoughCharacters;
        UChar* consumedCharacters;
        String consumedString = String::createUninitialized(count, consumedCharacters);
        advance(count, consumedCharacters);
        LookAheadResult result = DidNotMatch;
        if (consumedString.startsWith(string, caseSensitive))
            result = DidMatch;
        prepend(SegmentedString(consumedString));
        return result;
    }

    bool isComposite() const { return !m_substrings.isEmpty(); }

    UChar m_pushedChar1;
    UChar m_pushedChar2;
    SegmentedSubstring m_currentString;
    UChar m_currentChar;
    int m_numberOfCharactersConsumedPriorToCurrentString;
    int m_numberOfCharactersConsumedPriorToCurrentLine;
    int m_currentLine;
    Deque<SegmentedSubstring> m_substrings;
    bool m_closed;
    bool m_empty;
    unsigned char m_fastPathFlags;
    void (SegmentedString::*m_advanceFunc)();
    void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
};

}

#endif