parser.rb   [plain text]


# Copyright (C) 2011 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.

require "ast"
require "instructions"
require "pathname"
require "registers"
require "self_hash"

class CodeOrigin
    attr_reader :fileName, :lineNumber
    
    def initialize(fileName, lineNumber)
        @fileName = fileName
        @lineNumber = lineNumber
    end
    
    def to_s
        "#{fileName}:#{lineNumber}"
    end
end

class Token
    attr_reader :codeOrigin, :string
    
    def initialize(codeOrigin, string)
        @codeOrigin = codeOrigin
        @string = string
    end
    
    def ==(other)
        if other.is_a? Token
            @string == other.string
        else
            @string == other
        end
    end
    
    def =~(other)
        @string =~ other
    end
    
    def to_s
        "#{@string.inspect} at #{codeOrigin}"
    end
    
    def parseError(*comment)
        if comment.empty?
            raise "Parse error: #{to_s}"
        else
            raise "Parse error: #{to_s}: #{comment[0]}"
        end
    end
end

#
# The lexer. Takes a string and returns an array of tokens.
#

def lex(str, fileName)
    fileName = Pathname.new(fileName)
    result = []
    lineNumber = 1
    while not str.empty?
        case str
        when /\A\#([^\n]*)/
            # comment, ignore
        when /\A\n/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
            lineNumber += 1
        when /\A[a-zA-Z]([a-zA-Z0-9_]*)/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
        when /\A\.([a-zA-Z0-9_]*)/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
        when /\A_([a-zA-Z0-9_]*)/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
        when /\A([ \t]+)/
            # whitespace, ignore
        when /\A0x([0-9a-fA-F]+)/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&.hex.to_s)
        when /\A0([0-7]+)/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&.oct.to_s)
        when /\A([0-9]+)/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
        when /\A::/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
        when /\A[:,\(\)\[\]=\+\-~\|&^*]/
            result << Token.new(CodeOrigin.new(fileName, lineNumber), $&)
        else
            raise "Lexer error at #{CodeOrigin.new(fileName, lineNumber).to_s}, unexpected sequence #{str[0..20].inspect}"
        end
        str = $~.post_match
    end
    result
end

#
# Token identification.
#

def isRegister(token)
    token =~ REGISTER_PATTERN
end

def isInstruction(token)
    token =~ INSTRUCTION_PATTERN
end

def isKeyword(token)
    token =~ /\A((true)|(false)|(if)|(then)|(else)|(elsif)|(end)|(and)|(or)|(not)|(macro)|(const)|(sizeof)|(error)|(include))\Z/ or
        token =~ REGISTER_PATTERN or
        token =~ INSTRUCTION_PATTERN
end

def isIdentifier(token)
    token =~ /\A[a-zA-Z]([a-zA-Z0-9_]*)\Z/ and not isKeyword(token)
end

def isLabel(token)
    token =~ /\A_([a-zA-Z0-9_]*)\Z/
end

def isLocalLabel(token)
    token =~ /\A\.([a-zA-Z0-9_]*)\Z/
end

def isVariable(token)
    isIdentifier(token) or isRegister(token)
end

def isInteger(token)
    token =~ /\A[0-9]/
end

#
# The parser. Takes an array of tokens and returns an AST. Methods
# other than parse(tokens) are not for public consumption.
#

class Parser
    def initialize(data, fileName)
        @tokens = lex(data, fileName)
        @idx = 0
    end
    
    def parseError(*comment)
        if @tokens[@idx]
            @tokens[@idx].parseError(*comment)
        else
            if comment.empty?
                raise "Parse error at end of file"
            else
                raise "Parse error at end of file: #{comment[0]}"
            end
        end
    end
    
    def consume(regexp)
        if regexp
            parseError unless @tokens[@idx] =~ regexp
        else
            parseError unless @idx == @tokens.length
        end
        @idx += 1
    end
    
    def skipNewLine
        while @tokens[@idx] == "\n"
            @idx += 1
        end
    end
    
    def parsePredicateAtom
        if @tokens[@idx] == "not"
            @idx += 1
            parsePredicateAtom
        elsif @tokens[@idx] == "("
            @idx += 1
            skipNewLine
            result = parsePredicate
            parseError unless @tokens[@idx] == ")"
            @idx += 1
            result
        elsif @tokens[@idx] == "true"
            result = True.instance
            @idx += 1
            result
        elsif @tokens[@idx] == "false"
            result = False.instance
            @idx += 1
            result
        elsif isIdentifier @tokens[@idx]
            result = Setting.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
            @idx += 1
            result
        else
            parseError
        end
    end
    
    def parsePredicateAnd
        result = parsePredicateAtom
        while @tokens[@idx] == "and"
            codeOrigin = @tokens[@idx].codeOrigin
            @idx += 1
            skipNewLine
            right = parsePredicateAtom
            result = And.new(codeOrigin, result, right)
        end
        result
    end
    
    def parsePredicate
        # some examples of precedence:
        # not a and b -> (not a) and b
        # a and b or c -> (a and b) or c
        # a or b and c -> a or (b and c)
        
        result = parsePredicateAnd
        while @tokens[@idx] == "or"
            codeOrigin = @tokens[@idx].codeOrigin
            @idx += 1
            skipNewLine
            right = parsePredicateAnd
            result = Or.new(codeOrigin, result, right)
        end
        result
    end
    
    def parseVariable
        if isRegister(@tokens[@idx])
            if @tokens[@idx] =~ FPR_PATTERN
                result = FPRegisterID.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
            else
                result = RegisterID.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
            end
        elsif isIdentifier(@tokens[@idx])
            result = Variable.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
        else
            parseError
        end
        @idx += 1
        result
    end
    
    def parseAddress(offset)
        parseError unless @tokens[@idx] == "["
        codeOrigin = @tokens[@idx].codeOrigin
        
        # Three possibilities:
        # []       -> AbsoluteAddress
        # [a]      -> Address
        # [a,b]    -> BaseIndex with scale = 1
        # [a,b,c]  -> BaseIndex
        
        @idx += 1
        if @tokens[@idx] == "]"
            @idx += 1
            return AbsoluteAddress.new(codeOrigin, offset)
        end
        a = parseVariable
        if @tokens[@idx] == "]"
            result = Address.new(codeOrigin, a, offset)
        else
            parseError unless @tokens[@idx] == ","
            @idx += 1
            b = parseVariable
            if @tokens[@idx] == "]"
                result = BaseIndex.new(codeOrigin, a, b, 1, offset)
            else
                parseError unless @tokens[@idx] == ","
                @idx += 1
                parseError unless ["1", "2", "4", "8"].member? @tokens[@idx].string
                c = @tokens[@idx].string.to_i
                @idx += 1
                parseError unless @tokens[@idx] == "]"
                result = BaseIndex.new(codeOrigin, a, b, c, offset)
            end
        end
        @idx += 1
        result
    end
    
    def parseColonColon
        skipNewLine
        codeOrigin = @tokens[@idx].codeOrigin
        parseError unless isIdentifier @tokens[@idx]
        names = [@tokens[@idx].string]
        @idx += 1
        while @tokens[@idx] == "::"
            @idx += 1
            parseError unless isIdentifier @tokens[@idx]
            names << @tokens[@idx].string
            @idx += 1
        end
        raise if names.empty?
        [codeOrigin, names]
    end
    
    def parseExpressionAtom
        skipNewLine
        if @tokens[@idx] == "-"
            @idx += 1
            NegImmediate.new(@tokens[@idx - 1].codeOrigin, parseExpressionAtom)
        elsif @tokens[@idx] == "~"
            @idx += 1
            BitnotImmediate.new(@tokens[@idx - 1].codeOrigin, parseExpressionAtom)
        elsif @tokens[@idx] == "("
            @idx += 1
            result = parseExpression
            parseError unless @tokens[@idx] == ")"
            @idx += 1
            result
        elsif isInteger @tokens[@idx]
            result = Immediate.new(@tokens[@idx].codeOrigin, @tokens[@idx].string.to_i)
            @idx += 1
            result
        elsif isIdentifier @tokens[@idx]
            codeOrigin, names = parseColonColon
            if names.size > 1
                StructOffset.forField(codeOrigin, names[0..-2].join('::'), names[-1])
            else
                Variable.forName(codeOrigin, names[0])
            end
        elsif isRegister @tokens[@idx]
            parseVariable
        elsif @tokens[@idx] == "sizeof"
            @idx += 1
            codeOrigin, names = parseColonColon
            Sizeof.forName(codeOrigin, names.join('::'))
        else
            parseError
        end
    end
    
    def parseExpressionMul
        skipNewLine
        result = parseExpressionAtom
        while @tokens[@idx] == "*"
            if @tokens[@idx] == "*"
                @idx += 1
                result = MulImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAtom)
            else
                raise
            end
        end
        result
    end
    
    def couldBeExpression
        @tokens[@idx] == "-" or @tokens[@idx] == "~" or @tokens[@idx] == "sizeof" or isInteger(@tokens[@idx]) or isVariable(@tokens[@idx]) or @tokens[@idx] == "("
    end
    
    def parseExpressionAdd
        skipNewLine
        result = parseExpressionMul
        while @tokens[@idx] == "+" or @tokens[@idx] == "-"
            if @tokens[@idx] == "+"
                @idx += 1
                result = AddImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionMul)
            elsif @tokens[@idx] == "-"
                @idx += 1
                result = SubImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionMul)
            else
                raise
            end
        end
        result
    end
    
    def parseExpressionAnd
        skipNewLine
        result = parseExpressionAdd
        while @tokens[@idx] == "&"
            @idx += 1
            result = AndImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAdd)
        end
        result
    end
    
    def parseExpression
        skipNewLine
        result = parseExpressionAnd
        while @tokens[@idx] == "|" or @tokens[@idx] == "^"
            if @tokens[@idx] == "|"
                @idx += 1
                result = OrImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAnd)
            elsif @tokens[@idx] == "^"
                @idx += 1
                result = XorImmediates.new(@tokens[@idx - 1].codeOrigin, result, parseExpressionAnd)
            else
                raise
            end
        end
        result
    end
    
    def parseOperand(comment)
        skipNewLine
        if couldBeExpression
            expr = parseExpression
            if @tokens[@idx] == "["
                parseAddress(expr)
            else
                expr
            end
        elsif @tokens[@idx] == "["
            parseAddress(Immediate.new(@tokens[@idx].codeOrigin, 0))
        elsif isLabel @tokens[@idx]
            result = LabelReference.new(@tokens[@idx].codeOrigin, Label.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string))
            @idx += 1
            result
        elsif isLocalLabel @tokens[@idx]
            result = LocalLabelReference.new(@tokens[@idx].codeOrigin, LocalLabel.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string))
            @idx += 1
            result
        else
            parseError(comment)
        end
    end
    
    def parseMacroVariables
        skipNewLine
        consume(/\A\(\Z/)
        variables = []
        loop {
            skipNewLine
            if @tokens[@idx] == ")"
                @idx += 1
                break
            elsif isIdentifier(@tokens[@idx])
                variables << Variable.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
                @idx += 1
                skipNewLine
                if @tokens[@idx] == ")"
                    @idx += 1
                    break
                elsif @tokens[@idx] == ","
                    @idx += 1
                else
                    parseError
                end
            else
                parseError
            end
        }
        variables
    end
    
    def parseSequence(final, comment)
        firstCodeOrigin = @tokens[@idx].codeOrigin
        list = []
        loop {
            if (@idx == @tokens.length and not final) or (final and @tokens[@idx] =~ final)
                break
            elsif @tokens[@idx] == "\n"
                # ignore
                @idx += 1
            elsif @tokens[@idx] == "const"
                @idx += 1
                parseError unless isVariable @tokens[@idx]
                variable = Variable.forName(@tokens[@idx].codeOrigin, @tokens[@idx].string)
                @idx += 1
                parseError unless @tokens[@idx] == "="
                @idx += 1
                value = parseOperand("while inside of const #{variable.name}")
                list << ConstDecl.new(@tokens[@idx].codeOrigin, variable, value)
            elsif @tokens[@idx] == "error"
                list << Error.new(@tokens[@idx].codeOrigin)
                @idx += 1
            elsif @tokens[@idx] == "if"
                codeOrigin = @tokens[@idx].codeOrigin
                @idx += 1
                skipNewLine
                predicate = parsePredicate
                consume(/\A((then)|(\n))\Z/)
                skipNewLine
                ifThenElse = IfThenElse.new(codeOrigin, predicate, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
                list << ifThenElse
                while @tokens[@idx] == "elsif"
                    codeOrigin = @tokens[@idx].codeOrigin
                    @idx += 1
                    skipNewLine
                    predicate = parsePredicate
                    consume(/\A((then)|(\n))\Z/)
                    skipNewLine
                    elseCase = IfThenElse.new(codeOrigin, predicate, parseSequence(/\A((else)|(end)|(elsif))\Z/, "while inside of \"if #{predicate.dump}\""))
                    ifThenElse.elseCase = elseCase
                    ifThenElse = elseCase
                end
                if @tokens[@idx] == "else"
                    @idx += 1
                    ifThenElse.elseCase = parseSequence(/\Aend\Z/, "while inside of else case for \"if #{predicate.dump}\"")
                    @idx += 1
                else
                    parseError unless @tokens[@idx] == "end"
                    @idx += 1
                end
            elsif @tokens[@idx] == "macro"
                codeOrigin = @tokens[@idx].codeOrigin
                @idx += 1
                skipNewLine
                parseError unless isIdentifier(@tokens[@idx])
                name = @tokens[@idx].string
                @idx += 1
                variables = parseMacroVariables
                body = parseSequence(/\Aend\Z/, "while inside of macro #{name}")
                @idx += 1
                list << Macro.new(codeOrigin, name, variables, body)
            elsif isInstruction @tokens[@idx]
                codeOrigin = @tokens[@idx].codeOrigin
                name = @tokens[@idx].string
                @idx += 1
                if (not final and @idx == @tokens.size) or (final and @tokens[@idx] =~ final)
                    # Zero operand instruction, and it's the last one.
                    list << Instruction.new(codeOrigin, name, [])
                    break
                elsif @tokens[@idx] == "\n"
                    # Zero operand instruction.
                    list << Instruction.new(codeOrigin, name, [])
                    @idx += 1
                else
                    # It's definitely an instruction, and it has at least one operand.
                    operands = []
                    endOfSequence = false
                    loop {
                        operands << parseOperand("while inside of instruction #{name}")
                        if (not final and @idx == @tokens.size) or (final and @tokens[@idx] =~ final)
                            # The end of the instruction and of the sequence.
                            endOfSequence = true
                            break
                        elsif @tokens[@idx] == ","
                            # Has another operand.
                            @idx += 1
                        elsif @tokens[@idx] == "\n"
                            # The end of the instruction.
                            @idx += 1
                            break
                        else
                            parseError("Expected a comma, newline, or #{final} after #{operands.last.dump}")
                        end
                    }
                    list << Instruction.new(codeOrigin, name, operands)
                    if endOfSequence
                        break
                    end
                end
            elsif isIdentifier @tokens[@idx]
                codeOrigin = @tokens[@idx].codeOrigin
                name = @tokens[@idx].string
                @idx += 1
                if @tokens[@idx] == "("
                    # Macro invocation.
                    @idx += 1
                    operands = []
                    skipNewLine
                    if @tokens[@idx] == ")"
                        @idx += 1
                    else
                        loop {
                            skipNewLine
                            if @tokens[@idx] == "macro"
                                # It's a macro lambda!
                                codeOriginInner = @tokens[@idx].codeOrigin
                                @idx += 1
                                variables = parseMacroVariables
                                body = parseSequence(/\Aend\Z/, "while inside of anonymous macro passed as argument to #{name}")
                                @idx += 1
                                operands << Macro.new(codeOriginInner, nil, variables, body)
                            else
                                operands << parseOperand("while inside of macro call to #{name}")
                            end
                            skipNewLine
                            if @tokens[@idx] == ")"
                                @idx += 1
                                break
                            elsif @tokens[@idx] == ","
                                @idx += 1
                            else
                                parseError "Unexpected #{@tokens[@idx].string.inspect} while parsing invocation of macro #{name}"
                            end
                        }
                    end
                    list << MacroCall.new(codeOrigin, name, operands)
                else
                    parseError "Expected \"(\" after #{name}"
                end
            elsif isLabel @tokens[@idx] or isLocalLabel @tokens[@idx]
                codeOrigin = @tokens[@idx].codeOrigin
                name = @tokens[@idx].string
                @idx += 1
                parseError unless @tokens[@idx] == ":"
                # It's a label.
                if isLabel name
                    list << Label.forName(codeOrigin, name)
                else
                    list << LocalLabel.forName(codeOrigin, name)
                end
                @idx += 1
            elsif @tokens[@idx] == "include"
                @idx += 1
                parseError unless isIdentifier(@tokens[@idx])
                moduleName = @tokens[@idx].string
                fileName = @tokens[@idx].codeOrigin.fileName.dirname + (moduleName + ".asm")
                @idx += 1
                $stderr.puts "offlineasm: Including file #{fileName}"
                list << parse(fileName)
            else
                parseError "Expecting terminal #{final} #{comment}"
            end
        }
        Sequence.new(firstCodeOrigin, list)
    end
end

def parseData(data, fileName)
    parser = Parser.new(data, fileName)
    parser.parseSequence(nil, "")
end

def parse(fileName)
    parseData(IO::read(fileName), fileName)
end

def parseHash(fileName)
    dirHash(Pathname.new(fileName).dirname, /\.asm$/)
end