WRECParser.h   [plain text]


/*
 * Copyright (C) 2008 Apple Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */

#ifndef Parser_h
#define Parser_h

#include <wtf/Platform.h>

#if ENABLE(WREC)

#include "Escapes.h"
#include "Quantifier.h"
#include "UString.h"
#include "WRECGenerator.h"
#include <wtf/ASCIICType.h>

namespace JSC { namespace WREC {

    struct CharacterClass;

    class Parser {
    typedef Generator::JumpList JumpList;
    typedef Generator::ParenthesesType ParenthesesType;

    friend class SavedState;

    public:
        Parser(const UString& pattern, bool ignoreCase, bool multiline)
            : m_generator(*this)
            , m_data(pattern.data())
            , m_size(pattern.size())
            , m_ignoreCase(ignoreCase)
            , m_multiline(multiline)
        {
            reset();
        }
        
        Generator& generator() { return m_generator; }

        bool ignoreCase() const { return m_ignoreCase; }
        bool multiline() const { return m_multiline; }

        void recordSubpattern() { ++m_numSubpatterns; }
        unsigned numSubpatterns() const { return m_numSubpatterns; }
        
        const char* error() const { return m_error; }
        const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; }
        
        void parsePattern(JumpList& failures)
        {
            reset();

            parseDisjunction(failures);

            if (peek() != EndOfPattern)
                setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it.
        }

        void parseDisjunction(JumpList& failures);
        void parseAlternative(JumpList& failures);
        bool parseTerm(JumpList& failures);
        bool parseNonCharacterEscape(JumpList& failures, const Escape&);
        bool parseParentheses(JumpList& failures);
        bool parseCharacterClass(JumpList& failures);
        bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert);
        bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId);

    private:
        class SavedState {
        public:
            SavedState(Parser& parser)
                : m_parser(parser)
                , m_index(parser.m_index)
            {
            }
            
            void restore()
            {
                m_parser.m_index = m_index;
            }

        private:
            Parser& m_parser;
            unsigned m_index;
        };

        void reset()
        {
            m_index = 0;
            m_numSubpatterns = 0;
            m_error = 0;
        }

        void setError(const char* error)
        {
            if (m_error)
                return;
            m_error = error;
        }

        int peek()
        {
            if (m_index >= m_size)
                return EndOfPattern;
            return m_data[m_index];
        }

        int consume()
        {
            if (m_index >= m_size)
                return EndOfPattern;
            return m_data[m_index++];
        }

        bool peekIsDigit()
        {
            return WTF::isASCIIDigit(peek());
        }

        unsigned peekDigit()
        {
            ASSERT(peekIsDigit());
            return peek() - '0';
        }

        unsigned consumeDigit()
        {
            ASSERT(peekIsDigit());
            return consume() - '0';
        }

        unsigned consumeNumber()
        {
            int n = consumeDigit();
            while (peekIsDigit()) {
                n *= 10;
                n += consumeDigit();
            }
            return n;
        }

        int consumeHex(int count)
        {
            int n = 0;
            while (count--) {
                if (!WTF::isASCIIHexDigit(peek()))
                    return -1;
                n = (n << 4) | WTF::toASCIIHexValue(consume());
            }
            return n;
        }

        unsigned consumeOctal()
        {
            unsigned n = 0;
            while (n < 32 && WTF::isASCIIOctalDigit(peek()))
                n = n * 8 + consumeDigit();
            return n;
        }
        
        ALWAYS_INLINE Quantifier consumeGreedyQuantifier();
        Quantifier consumeQuantifier();
        Escape consumeEscape(bool inCharacterClass);
        ParenthesesType consumeParenthesesType();

        static const int EndOfPattern = -1;

        // Error messages.
        static const char* QuantifierOutOfOrder;
        static const char* QuantifierWithoutAtom;
        static const char* ParenthesesUnmatched;
        static const char* ParenthesesTypeInvalid;
        static const char* ParenthesesNotSupported;
        static const char* CharacterClassUnmatched;
        static const char* CharacterClassOutOfOrder;
        static const char* EscapeUnterminated;

        Generator m_generator;
        const UChar* m_data;
        unsigned m_size;
        unsigned m_index;
        bool m_ignoreCase;
        bool m_multiline;
        unsigned m_numSubpatterns;
        const char* m_error;
    };

} } // namespace JSC::WREC

#endif // ENABLE(WREC)

#endif // Parser_h