ud_opcode.py   [plain text]


# udis86 - scripts/ud_opcode.py
# 
# Copyright (c) 2009 Vivek Thampi
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification, 
# are permitted provided that the following conditions are met:
# 
#     * Redistributions of source code must retain the above copyright notice, 
#       this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright notice, 
#       this list of conditions and the following disclaimer in the documentation 
#       and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

class UdOpcodeTables:

    TableInfo = {
        'opctbl'    : { 'name' : 'UD_TAB__OPC_TABLE',   'size' : 256 },
        '/sse'      : { 'name' : 'UD_TAB__OPC_SSE',     'size' : 4 },
        '/reg'      : { 'name' : 'UD_TAB__OPC_REG',     'size' : 8 },
        '/rm'       : { 'name' : 'UD_TAB__OPC_RM',      'size' : 8 },
        '/mod'      : { 'name' : 'UD_TAB__OPC_MOD',     'size' : 2 },
        '/m'        : { 'name' : 'UD_TAB__OPC_MODE',    'size' : 3 },
        '/x87'      : { 'name' : 'UD_TAB__OPC_X87',     'size' : 64 },
        '/a'        : { 'name' : 'UD_TAB__OPC_ASIZE',   'size' : 3 },
        '/o'        : { 'name' : 'UD_TAB__OPC_OSIZE',   'size' : 3 },
        '/3dnow'    : { 'name' : 'UD_TAB__OPC_3DNOW',   'size' : 256 },
        'vendor'    : { 'name' : 'UD_TAB__OPC_VENDOR',  'size' : 3 },
    }

    OpcodeTable0 = {
        'type'      : 'opctbl',
        'entries'   : {},
        'meta'      : 'table0'
    }

    OpcExtIndex = {

        # ssef2, ssef3, sse66
        'sse': {
            'none' : '00', 
            'f2'   : '01', 
            'f3'   : '02', 
            '66'   : '03'
        },

        # /mod=
        'mod': {
            '!11'   : '00', 
            '11'    : '01'
        },

        # /m=, /o=, /a=
        'mode': { 
            '16'    : '00', 
            '32'    : '01', 
            '64'    : '02'
        },

        'vendor' : {
            'amd'   : '00',
            'intel' : '01',
            'any'   : '02'
        }
    }

    InsnTable = []
    MnemonicsTable = []

    ThreeDNowTable = {}

    def sizeOfTable( self, t ): 
        return self.TableInfo[ t ][ 'size' ]

    def nameOfTable( self, t ): 
        return self.TableInfo[ t ][ 'name' ]

    #
    # Updates a table entry: If the entry doesn't exist
    # it will create the entry, otherwise, it will walk
    # while validating the path.
    #
    def updateTable( self, table, index, type, meta ):
        if not index in table[ 'entries' ]:
            table[ 'entries' ][ index ] = { 'type' : type, 'entries' : {}, 'meta' : meta } 
        if table[ 'entries' ][ index ][ 'type' ] != type:
            raise NameError( "error: violation in opcode mapping (overwrite) %s with %s." % 
                                ( table[ 'entries' ][ index ][ 'type' ], type) )
        return table[ 'entries' ][ index ]

    class Insn:
        """An abstract type representing an instruction in the opcode map.
        """

        # A mapping of opcode extensions to their representational
        # values used in the opcode map.
        OpcExtMap = {
            '/rm'    : lambda v: "%02x" % int(v, 16),
            '/x87'   : lambda v: "%02x" % int(v, 16),
            '/3dnow' : lambda v: "%02x" % int(v, 16),
            '/reg'   : lambda v: "%02x" % int(v, 16),
            # modrm.mod
            # (!11, 11)    => (00, 01)
            '/mod'   : lambda v: '00' if v == '!11' else '01',
            # Mode extensions:
            # (16, 32, 64) => (00, 01, 02)
            '/o'     : lambda v: "%02x" % (int(v) / 32),
            '/a'     : lambda v: "%02x" % (int(v) / 32),
            '/m'     : lambda v: "%02x" % (int(v) / 32),
            '/sse'   : lambda v: UdOpcodeTables.OpcExtIndex['sse'][v]
        }

        def __init__(self, prefixes, mnemonic, opcodes, operands, vendor):
            self.opcodes  = opcodes
            self.prefixes = prefixes
            self.mnemonic = mnemonic
            self.operands = operands
            self.vendor   = vendor
            self.opcext   = {}

            ssePrefix = None
            if self.opcodes[0] in ('ssef2', 'ssef3', 'sse66'):
                ssePrefix = self.opcodes[0][3:]
                self.opcodes.pop(0)

            # do some preliminary decoding of the instruction type
            # 1byte, 2byte or 3byte instruction?
            self.nByteInsn = 1
            if self.opcodes[0] == '0f': # 2byte
                # 2+ byte opcodes are always disambiguated by an
                # sse prefix, unless it is a 3d now instruction
                # which is 0f 0f ...
                if self.opcodes[1] != '0f' and ssePrefix is None:
                    ssePrefix = 'none'
                if self.opcodes[1] in ('38', '3a'): # 3byte
                    self.nByteInsn = 3
                else:
                    self.nByteInsn = 2
           
            # The opcode that indexes into the opcode table.
            self.opcode = self.opcodes[self.nByteInsn - 1]
            
            # Record opcode extensions
            for opcode in self.opcodes[self.nByteInsn:]:
                arg, val = opcode.split('=')
                self.opcext[arg] = self.OpcExtMap[arg](val)

            # Record sse extension: the reason sse extension is handled 
            # separately is that historically sse was handled as a first
            # class opcode, not as an extension. Now that sse is handled
            # as an extension, we do the manual conversion here, as opposed
            # to modifying the opcode xml file.
            if ssePrefix is not None:
                self.opcext['/sse'] = self.OpcExtMap['/sse'](ssePrefix)

    def parse(self, table, insn):
        index = insn.opcodes[0];
        if insn.nByteInsn > 1:
            assert index == '0f'
            table = self.updateTable(table, index, 'opctbl', '0f')
            index = insn.opcodes[1]

            if insn.nByteInsn == 3:
                table = self.updateTable(table, index, 'opctbl', index)
                index = insn.opcodes[2]

        # Walk down the tree, create levels as needed, for opcode
        # extensions. The order is important, and determines how
        # well the opcode table is packed. Also note, /sse must be
        # before /o, because /sse may consume operand size prefix
        # affect the outcome of /o.
        for ext in ('/mod', '/x87', '/reg', '/rm', '/sse',
                    '/o',   '/a',   '/m',   '/3dnow'):
            if ext in insn.opcext:
                table = self.updateTable(table, index, ext, ext)
                index = insn.opcext[ext]

        # additional table for disambiguating vendor
        if len(insn.vendor):
            table = self.updateTable(table, index, 'vendor', insn.vendor)
            index = self.OpcExtIndex['vendor'][insn.vendor]

        # make leaf node entries
        leaf = self.updateTable(table, index, 'insn', '')

        leaf['mnemonic'] = insn.mnemonic
        leaf['prefixes'] = insn.prefixes
        leaf['operands'] = insn.operands

        # add instruction to linear table of instruction forms
        self.InsnTable.append({ 'prefixes' : insn.prefixes,  
                                'mnemonic' : insn.mnemonic, 
                                'operands' : insn.operands })

        # add mnemonic to mnemonic table
        if not insn.mnemonic in self.MnemonicsTable:
            self.MnemonicsTable.append(insn.mnemonic)


    # Adds an instruction definition to the opcode tables
    def addInsnDef( self, prefixes, mnemonic, opcodes, operands, vendor ):
        insn = self.Insn(prefixes=prefixes,
                    mnemonic=mnemonic,
                    opcodes=opcodes,
                    operands=operands,
                    vendor=vendor)
        self.parse(self.OpcodeTable0, insn)

    def print_table( self, table, pfxs ):
        print("%s   |" % pfxs)
        keys = table[ 'entries' ].keys()
        if ( len( keys ) ):
            keys.sort()
        for idx in keys:
            e = table[ 'entries' ][ idx ]
            if e[ 'type' ] == 'insn':
                print("%s   |-<%s>" % ( pfxs, idx )),
                print("%s %s" % ( e[ 'mnemonic' ], ' '.join( e[ 'operands'] )))
            else:
                print("%s   |-<%s> %s" % ( pfxs, idx, e['type'] ))
                self.print_table( e, pfxs + '   |' )

    def print_tree( self ): 
        self.print_table( self.OpcodeTable0, '' )