SourceHTMLTokenizer.re2js   [plain text]


/*
 * Copyright (C) 2009 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

// Generate js file as follows:
//
// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
// | sed 's|^yy\([^:]*\)*\:|case \1:|' \
// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
// | sed 's|[*]cursor|this._charAt(cursor)|' \
// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
// | sed 's|unsigned\ int|var|' \
// | sed 's|var\ yych|case 1: var yych|'

WebInspector.SourceHTMLTokenizer = function()
{
    WebInspector.SourceTokenizer.call(this);

    this._lexConditions = {
        INITIAL: 0,
        COMMENT: 1,
        DSTRING: 2,
        SSTRING: 3
    };

    this._parseConditions = {
        INITIAL: 0,
        TAG: 1,
        ATTRIBUTE: 2,
        ATTRIBUTE_VALUE: 3,
        SCRIPT: 4,
        SCRIPT_ATTRIBUTE: 5,
        SCRIPT_ATTRIBUTE_VALUE: 6,
        DOCTYPE: 7
    };

    this.case_INITIAL = 1000;
    this.case_COMMENT = 1001;
    this.case_DSTRING = 1002;
    this.case_SSTRING = 1003;

    this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
}

WebInspector.SourceHTMLTokenizer.prototype = {
    _isAttribute: function()
    {
        return this._parseCondition === this._parseConditions.ATTRIBUTE || this._parseCondition === this._parseConditions.SCRIPT_ATTRIBUTE;
    },

    _isAttributeValue: function()
    {
        return this._parseCondition === this._parseConditions.ATTRIBUTE_VALUE || this._parseCondition === this._parseConditions.SCRIPT_ATTRIBUTE_VALUE;
    },

    _setAttributeValue: function()
    {
        if (this._parseCondition === this._parseConditions.ATTRIBUTE)
            this._parseCondition = this._parseConditions.ATTRIBUTE_VALUE;
        else if (this._parseCondition === this._parseConditions.SCRIPT_ATTRIBUTE)
            this._parseCondition = this._parseConditions.SCRIPT_ATTRIBUTE_VALUE;
    },

    _setAttribute: function()
    {
        if (this._parseCondition === this._parseConditions.ATTRIBUTE_VALUE)
            this._parseCondition = this._parseConditions.ATTRIBUTE;
        else if (this._parseCondition === this._parseConditions.SCRIPT_ATTRIBUTE_VALUE)
            this._parseCondition = this._parseConditions.SCRIPT_ATTRIBUTE;
    },

    _stringToken: function(cursor, stringEnds)
    {
        if (this._isAttributeValue()) {
            this.tokenType = "html-attr-value";
            if (stringEnds)
                this._setAttribute();
        } else if (this._parseCondition === this._parseConditions.DOCTYPE)
            this.tokenType = "html-doctype";
        else
            this.tokenType = null;
        return cursor;
    },

    nextToken: function(cursor)
    {
        var cursorOnEnter = cursor;
        var gotoCase = 1;
        while (1) {
            switch (gotoCase)
            // Following comment is replaced with generated state machine.
            /*!re2c
                re2c:define:YYCTYPE  = "var";
                re2c:define:YYCURSOR = cursor;
                re2c:define:YYGETCONDITION = "this.getLexCondition";
                re2c:define:YYSETCONDITION = "this.setLexCondition";
                re2c:condprefix = "case this.case_";
                re2c:condenumprefix = "this._lexConditions.";
                re2c:yyfill:enable = 0;
                re2c:labelprefix = "case ";
                re2c:indent:top = 2;
                re2c:indent:string = "    ";

                CommentContent = ([^-\r\n] | ("--" [^>]))*;
                Comment = "<!--" CommentContent "-->";
                CommentStart = "<!--" CommentContent [\r\n];
                CommentEnd = CommentContent "-->";

                DocTypeLT = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
                ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
                ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];

                LT = "<" | "</";
                GT = ">";
                EqualSign = "=";

                DoubleStringContent = [^\r\n\"]*;
                SingleStringContent = [^\r\n\']*;
                StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
                DoubleStringStart = "\"" DoubleStringContent [\r\n];
                DoubleStringEnd = DoubleStringContent "\"";
                SingleStringStart = "'" SingleStringContent [\r\n];
                SingleStringEnd = SingleStringContent "'";

                Identifier = [_a-zA-Z0-9\x80-\xFF]+;

                <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
                <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
                <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
                <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }

                <INITIAL> DocTypeLT => INITIAL
                {
                    this.tokenType = "html-doctype";
                    this._parseCondition = this._parseConditions.DOCTYPE;
                    return cursor;
                }

                <INITIAL> ScriptStart => INITIAL
                {
                    this.tokenType = "html-tag";
                    this._parseCondition = this._parseConditions.SCRIPT_ATTRIBUTE;
                    return cursor;
                }

                <INITIAL> ScriptEnd => INITIAL
                {
                    this.tokenType = "html-tag";
                    this._parseCondition = this._parseConditions.INITIAL;
                    return cursor;
                }

                <INITIAL> LT => INITIAL
                {
                    if (this._parseCondition === this._parseConditions.SCRIPT) {
                        this.tokenType = null;
                        return cursor;
                    }

                    this.tokenType = "html-tag";
                    this._parseCondition = this._parseConditions.TAG;
                    return cursor;
                }

                <INITIAL> GT => INITIAL
                {
                    if (this._parseCondition === this._parseConditions.SCRIPT) {
                        this.tokenType = null;
                        return cursor;
                    }

                    if (this._parseCondition === this._parseConditions.DOCTYPE)
                        this.tokenType = "html-doctype";
                    else
                        this.tokenType = "html-tag";

                    if (this._parseCondition === this._parseConditions.SCRIPT_ATTRIBUTE)
                        this._parseCondition = this._parseConditions.SCRIPT;
                    else
                        this._parseCondition = this._parseConditions.INITIAL;
                    return cursor;
                }

                <INITIAL> StringLiteral { return this._stringToken(cursor, true); }
                <INITIAL> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
                <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
                <DSTRING> DoubleStringEnd => INITIAL { return this._stringToken(cursor, true); }
                <INITIAL> SingleStringStart => SSTRING { return this._stringToken(cursor); }
                <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
                <SSTRING> SingleStringEnd => INITIAL { return this._stringToken(cursor, true); }

                <INITIAL> EqualSign => INITIAL
                {
                    if (this._isAttribute()) {
                        this.tokenType = null;
                        this._setAttributeValue();
                    } else if (this._parseCondition === this._parseConditions.DOCTYPE)
                        this.tokenType = "html-doctype";
                    else
                        this.tokenType = null;
                    return cursor;
                }

                <INITIAL> Identifier
                {
                    if (this._parseCondition === this._parseConditions.SCRIPT) {
                        this.tokenType = null;
                        return cursor;
                    }

                    if (this._parseCondition === this._parseConditions.TAG) {
                        this.tokenType = "html-tag";
                        this._parseCondition = this._parseConditions.ATTRIBUTE;
                    } else if (this._isAttribute())
                        this.tokenType = "html-attr-name";
                    else if (this._isAttributeValue())
                        this.tokenType = "html-attr-value";
                    else if (this._parseCondition === this._parseConditions.DOCTYPE)
                        this.tokenType = "html-doctype";
                    else
                        this.tokenType = null;
                    return cursor;
                }
                <*> [^] { this.tokenType = null; return cursor; }
            */
        }
    }
}

WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;