ud_itab.py   [plain text]


# udis86 - scripts/ud_itab.py
# 
# Copyright (c) 2009, 2013 Vivek Thampi
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without modification, 
# are permitted provided that the following conditions are met:
# 
#     * Redistributions of source code must retain the above copyright notice, 
#       this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright notice, 
#       this list of conditions and the following disclaimer in the documentation 
#       and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import sys
from ud_opcode import UdOpcodeTable, UdOpcodeTables, UdInsnDef

class UdItabGenerator:

    OperandDict = {
        "Av"       : [    "OP_A"        , "SZ_V"     ],
        "E"        : [    "OP_E"        , "SZ_NA"    ],
        "Eb"       : [    "OP_E"        , "SZ_B"     ],
        "Ew"       : [    "OP_E"        , "SZ_W"     ],
        "Ev"       : [    "OP_E"        , "SZ_V"     ],
        "Ed"       : [    "OP_E"        , "SZ_D"     ],
        "Ey"       : [    "OP_E"        , "SZ_Y"     ],
        "Eq"       : [    "OP_E"        , "SZ_Q"     ],
        "Ez"       : [    "OP_E"        , "SZ_Z"     ],
        "Fv"       : [    "OP_F"        , "SZ_V"     ],
        "G"        : [    "OP_G"        , "SZ_NA"    ],
        "Gb"       : [    "OP_G"        , "SZ_B"     ],
        "Gw"       : [    "OP_G"        , "SZ_W"     ],
        "Gv"       : [    "OP_G"        , "SZ_V"     ],
        "Gy"       : [    "OP_G"        , "SZ_Y"     ],
        "Gd"       : [    "OP_G"        , "SZ_D"     ],
        "Gq"       : [    "OP_G"        , "SZ_Q"     ],
        "Gz"       : [    "OP_G"        , "SZ_Z"     ],
        "M"        : [    "OP_M"        , "SZ_NA"    ],
        "Mb"       : [    "OP_M"        , "SZ_B"     ],
        "Mw"       : [    "OP_M"        , "SZ_W"     ],
        "Ms"       : [    "OP_M"        , "SZ_W"     ],
        "Md"       : [    "OP_M"        , "SZ_D"     ],
        "Mq"       : [    "OP_M"        , "SZ_Q"     ],
        "Mdq"      : [    "OP_M"        , "SZ_DQ"    ],
        "Mv"       : [    "OP_M"        , "SZ_V"     ],
        "Mt"       : [    "OP_M"        , "SZ_T"     ],
        "Mo"       : [    "OP_M"        , "SZ_O"     ],
        "MbRd"     : [    "OP_MR"       , "SZ_BD"    ],
        "MbRv"     : [    "OP_MR"       , "SZ_BV"    ],
        "MwRv"     : [    "OP_MR"       , "SZ_WV"    ],
        "MwRd"     : [    "OP_MR"       , "SZ_WD"    ],
        "MwRy"     : [    "OP_MR"       , "SZ_WY"    ],
        "MdRy"     : [    "OP_MR"       , "SZ_DY"    ],
        "I1"       : [    "OP_I1"       , "SZ_NA"    ],
        "I3"       : [    "OP_I3"       , "SZ_NA"    ],
        "Ib"       : [    "OP_I"        , "SZ_B"     ],
        "Iw"       : [    "OP_I"        , "SZ_W"     ],
        "Iv"       : [    "OP_I"        , "SZ_V"     ],
        "Iz"       : [    "OP_I"        , "SZ_Z"     ],
        "sIb"      : [    "OP_sI"       , "SZ_B"     ],
        "sIz"      : [    "OP_sI"       , "SZ_Z"     ],
        "sIv"      : [    "OP_sI"       , "SZ_V"     ],
        "Jv"       : [    "OP_J"        , "SZ_V"     ],
        "Jz"       : [    "OP_J"        , "SZ_Z"     ],
        "Jb"       : [    "OP_J"        , "SZ_B"     ],
        "R"        : [    "OP_R"        , "SZ_RDQ"   ], 
        "C"        : [    "OP_C"        , "SZ_NA"    ],
        "D"        : [    "OP_D"        , "SZ_NA"    ],
        "S"        : [    "OP_S"        , "SZ_W"     ],
        "Ob"       : [    "OP_O"        , "SZ_B"     ],
        "Ow"       : [    "OP_O"        , "SZ_W"     ],
        "Ov"       : [    "OP_O"        , "SZ_V"     ],
        "U"        : [    "OP_U"        , "SZ_O"     ],
        "Ux"       : [    "OP_U"        , "SZ_X"     ],
        "V"        : [    "OP_V"        , "SZ_DQ"    ],
        "Vdq"      : [    "OP_V"        , "SZ_DQ"    ],
        "Vqq"      : [    "OP_V"        , "SZ_QQ"    ],
        "Vsd"      : [    "OP_V"        , "SZ_Q"     ],
        "Vx"       : [    "OP_V"        , "SZ_X"     ],
        "H"        : [    "OP_H"        , "SZ_X"     ],
        "Hx"       : [    "OP_H"        , "SZ_X"     ],
        "Hqq"      : [    "OP_H"        , "SZ_QQ"    ],
        "W"        : [    "OP_W"        , "SZ_DQ"    ],
        "Wdq"      : [    "OP_W"        , "SZ_DQ"    ],
        "Wqq"      : [    "OP_W"        , "SZ_QQ"    ],
        "Wsd"      : [    "OP_W"        , "SZ_Q"     ],
        "Wx"       : [    "OP_W"        , "SZ_X"     ],
        "L"        : [    "OP_L"        , "SZ_O"     ],
        "Lx"       : [    "OP_L"        , "SZ_X"     ],
        "MwU"      : [    "OP_MU"       , "SZ_WO"    ],
        "MdU"      : [    "OP_MU"       , "SZ_DO"    ],
        "MqU"      : [    "OP_MU"       , "SZ_QO"    ],
        "N"        : [    "OP_N"        , "SZ_Q"     ],
        "P"        : [    "OP_P"        , "SZ_Q"     ],
        "Q"        : [    "OP_Q"        , "SZ_Q"     ],
        "AL"       : [    "OP_AL"       , "SZ_B"     ],
        "AX"       : [    "OP_AX"       , "SZ_W"     ],
        "eAX"      : [    "OP_eAX"      , "SZ_Z"     ],
        "rAX"      : [    "OP_rAX"      , "SZ_V"     ],
        "CL"       : [    "OP_CL"       , "SZ_B"     ],
        "CX"       : [    "OP_CX"       , "SZ_W"     ],
        "eCX"      : [    "OP_eCX"      , "SZ_Z"     ],
        "rCX"      : [    "OP_rCX"      , "SZ_V"     ],
        "DL"       : [    "OP_DL"       , "SZ_B"     ],
        "DX"       : [    "OP_DX"       , "SZ_W"     ],
        "eDX"      : [    "OP_eDX"      , "SZ_Z"     ],
        "rDX"      : [    "OP_rDX"      , "SZ_V"     ],
        "R0b"      : [    "OP_R0"       , "SZ_B"     ],
        "R1b"      : [    "OP_R1"       , "SZ_B"     ],
        "R2b"      : [    "OP_R2"       , "SZ_B"     ],
        "R3b"      : [    "OP_R3"       , "SZ_B"     ],
        "R4b"      : [    "OP_R4"       , "SZ_B"     ],
        "R5b"      : [    "OP_R5"       , "SZ_B"     ],
        "R6b"      : [    "OP_R6"       , "SZ_B"     ],
        "R7b"      : [    "OP_R7"       , "SZ_B"     ],
        "R0w"      : [    "OP_R0"       , "SZ_W"     ],
        "R1w"      : [    "OP_R1"       , "SZ_W"     ],
        "R2w"      : [    "OP_R2"       , "SZ_W"     ],
        "R3w"      : [    "OP_R3"       , "SZ_W"     ],
        "R4w"      : [    "OP_R4"       , "SZ_W"     ],
        "R5w"      : [    "OP_R5"       , "SZ_W"     ],
        "R6w"      : [    "OP_R6"       , "SZ_W"     ],
        "R7w"      : [    "OP_R7"       , "SZ_W"     ],
        "R0v"      : [    "OP_R0"       , "SZ_V"     ],
        "R1v"      : [    "OP_R1"       , "SZ_V"     ],
        "R2v"      : [    "OP_R2"       , "SZ_V"     ],
        "R3v"      : [    "OP_R3"       , "SZ_V"     ],
        "R4v"      : [    "OP_R4"       , "SZ_V"     ],
        "R5v"      : [    "OP_R5"       , "SZ_V"     ],
        "R6v"      : [    "OP_R6"       , "SZ_V"     ],
        "R7v"      : [    "OP_R7"       , "SZ_V"     ],
        "R0z"      : [    "OP_R0"       , "SZ_Z"     ],
        "R1z"      : [    "OP_R1"       , "SZ_Z"     ],
        "R2z"      : [    "OP_R2"       , "SZ_Z"     ],
        "R3z"      : [    "OP_R3"       , "SZ_Z"     ],
        "R4z"      : [    "OP_R4"       , "SZ_Z"     ],
        "R5z"      : [    "OP_R5"       , "SZ_Z"     ],
        "R6z"      : [    "OP_R6"       , "SZ_Z"     ],
        "R7z"      : [    "OP_R7"       , "SZ_Z"     ],
        "R0y"      : [    "OP_R0"       , "SZ_Y"     ],
        "R1y"      : [    "OP_R1"       , "SZ_Y"     ],
        "R2y"      : [    "OP_R2"       , "SZ_Y"     ],
        "R3y"      : [    "OP_R3"       , "SZ_Y"     ],
        "R4y"      : [    "OP_R4"       , "SZ_Y"     ],
        "R5y"      : [    "OP_R5"       , "SZ_Y"     ],
        "R6y"      : [    "OP_R6"       , "SZ_Y"     ],
        "R7y"      : [    "OP_R7"       , "SZ_Y"     ],
        "ES"       : [    "OP_ES"       , "SZ_NA"    ],
        "CS"       : [    "OP_CS"       , "SZ_NA"    ],
        "DS"       : [    "OP_DS"       , "SZ_NA"    ],
        "SS"       : [    "OP_SS"       , "SZ_NA"    ],
        "GS"       : [    "OP_GS"       , "SZ_NA"    ],
        "FS"       : [    "OP_FS"       , "SZ_NA"    ],
        "ST0"      : [    "OP_ST0"      , "SZ_NA"    ],
        "ST1"      : [    "OP_ST1"      , "SZ_NA"    ],
        "ST2"      : [    "OP_ST2"      , "SZ_NA"    ],
        "ST3"      : [    "OP_ST3"      , "SZ_NA"    ],
        "ST4"      : [    "OP_ST4"      , "SZ_NA"    ],
        "ST5"      : [    "OP_ST5"      , "SZ_NA"    ],
        "ST6"      : [    "OP_ST6"      , "SZ_NA"    ],
        "ST7"      : [    "OP_ST7"      , "SZ_NA"    ],
        "NONE"     : [    "OP_NONE"     , "SZ_NA"    ],
    }

    #
    # opcode prefix dictionary
    # 
    PrefixDict = { 
        "rep"      : "P_str",   
        "repz"     : "P_strz",   
        "aso"      : "P_aso",   
        "oso"      : "P_oso",   
        "rexw"     : "P_rexw", 
        "rexb"     : "P_rexb",  
        "rexx"     : "P_rexx",  
        "rexr"     : "P_rexr",
        "vexl"     : "P_vexl",
        "vexw"     : "P_vexw",
        "seg"      : "P_seg",
        "inv64"    : "P_inv64", 
        "def64"    : "P_def64", 
        "cast"     : "P_cast",
    }

    MnemonicAliases = ( "invalid", "3dnow", "none", "db", "pause" )
    
    def __init__(self, tables):
        self.tables = tables
        self._insnIndexMap, i = {}, 0
        for insn in tables.getInsnList():
            self._insnIndexMap[insn], i = i, i + 1

        self._tableIndexMap, i = {}, 0
        for table in tables.getTableList():
            self._tableIndexMap[table], i = i, i + 1

    def getInsnIndex(self, insn):
        assert isinstance(insn, UdInsnDef)
        return self._insnIndexMap[insn]

    def getTableIndex(self, table):
        assert isinstance(table, UdOpcodeTable)
        return self._tableIndexMap[table]

    def getTableName(self, table):
        return "ud_itab__%d" % self.getTableIndex(table)

    def genOpcodeTable(self, table, isGlobal=False):
        """Emit Opcode Table in C.
        """
        self.ItabC.write( "\n" );
        if not isGlobal:
            self.ItabC.write('static ')
        self.ItabC.write( "const uint16_t %s[] = {\n" % self.getTableName(table))
        for i in range(table.size()):
            if i > 0 and i % 4 == 0: 
                self.ItabC.write( "\n" )
            if i % 4 == 0:
                self.ItabC.write( "  /* %2x */" % i)
            e = table.entryAt(i)
            if e is None:
                self.ItabC.write("%12s," % "INVALID")
            elif isinstance(e, UdOpcodeTable):
                self.ItabC.write("%12s," % ("GROUP(%d)" % self.getTableIndex(e)))
            elif isinstance(e, UdInsnDef):
                self.ItabC.write("%12s," % self.getInsnIndex(e))
        self.ItabC.write( "\n" )
        self.ItabC.write( "};\n" )


    def genOpcodeTables(self):
        tables = self.tables.getTableList()
        for table in tables:
            self.genOpcodeTable(table, table is self.tables.root)


    def genOpcodeTablesLookupIndex(self):
        self.ItabC.write( "\n\n"  );
        self.ItabC.write( "struct ud_lookup_table_list_entry ud_lookup_table_list[] = {\n" )
        for table in self.tables.getTableList():
            f0 = self.getTableName(table) + ","
            f1 = table.label() + ","
            f2 = "\"%s\"" % table.meta()
            self.ItabC.write("    /* %03d */ { %s %s %s },\n" % 
                             (self.getTableIndex(table), f0, f1, f2))
        self.ItabC.write( "};" )


    def genInsnTable( self ):
        self.ItabC.write( "struct ud_itab_entry ud_itab[] = {\n" );
        for insn in self.tables.getInsnList():
            opr_c = [ "O_NONE", "O_NONE", "O_NONE", "O_NONE" ]
            pfx_c = []
            opr   = insn.operands
            for i in range(len(opr)): 
                if not (opr[i] in self.OperandDict.keys()):
                    print("error: invalid operand declaration: %s\n" % opr[i])
                opr_c[i] = "O_" + opr[i]
            opr = "%s %s %s %s" % (opr_c[0] + ",", opr_c[1] + ",",
                                   opr_c[2] + ",", opr_c[3])

            for p in insn.prefixes:
                if not ( p in self.PrefixDict.keys() ):
                    print("error: invalid prefix specification: %s \n" % pfx)
                pfx_c.append( self.PrefixDict[p] )
            if len(insn.prefixes) == 0:
                pfx_c.append( "P_none" )
            pfx = "|".join( pfx_c )

            self.ItabC.write( "  /* %04d */ { UD_I%s %s, %s },\n" \
                        % ( self.getInsnIndex(insn), insn.mnemonic + ',', opr, pfx ) )
        self.ItabC.write( "};\n" )

   
    def getMnemonicsList(self):
        mnemonics = self.tables.getMnemonicsList()
        mnemonics.extend(self.MnemonicAliases)
        return mnemonics

    def genMnemonicsList(self):
        mnemonics = self.getMnemonicsList()
        self.ItabC.write( "\n\n"  );
        self.ItabC.write( "const char* ud_mnemonics_str[] = {\n    " )
        self.ItabC.write( ",\n    ".join( [ "\"%s\"" % m for m in mnemonics ] ) )
        self.ItabC.write( "\n};\n" )
 

    def genItabH( self, filePath ):
        self.ItabH = open( filePath, "w" )

        # Generate Table Type Enumeration
        self.ItabH.write( "#ifndef UD_ITAB_H\n" )
        self.ItabH.write( "#define UD_ITAB_H\n\n" )

        self.ItabH.write("/* itab.h -- generated by udis86:scripts/ud_itab.py, do no edit */\n\n")

        # table type enumeration
        self.ItabH.write( "/* ud_table_type -- lookup table types (see decode.c) */\n" )
        self.ItabH.write( "enum ud_table_type {\n    " )
        enum = UdOpcodeTable.getLabels()
        self.ItabH.write( ",\n    ".join( enum ) )
        self.ItabH.write( "\n};\n\n" );

        # mnemonic enumeration
        self.ItabH.write( "/* ud_mnemonic -- mnemonic constants */\n" )
        enum  = "enum ud_mnemonic_code {\n    "
        enum += ",\n    ".join( [ "UD_I%s" % m for m in self.getMnemonicsList() ] )
        enum += ",\n    UD_MAX_MNEMONIC_CODE"
        enum += "\n};\n"
        self.ItabH.write( enum )
        self.ItabH.write( "\n" )

        self.ItabH.write( "extern const char * ud_mnemonics_str[];\n" )

        self.ItabH.write( "\n#endif /* UD_ITAB_H */\n" )
    
        self.ItabH.close()


    def genItabC(self, filePath):
        self.ItabC = open(filePath, "w")
        self.ItabC.write("/* itab.c -- generated by udis86:scripts/ud_itab.py, do no edit")
        self.ItabC.write(" */\n");
        self.ItabC.write("#include \"udis86_decode.h\"\n\n");

        self.ItabC.write("#define GROUP(n) (0x8000 | (n))\n")
        self.ItabC.write("#define INVALID  %d\n\n" % self.getInsnIndex(self.tables.invalidInsn))

        self.genOpcodeTables() 
        self.genOpcodeTablesLookupIndex()

        #
        # Macros defining short-names for operands
        #
        self.ItabC.write("\n\n/* itab entry operand definitions (for readability) */\n");
        operands = self.OperandDict.keys()
        operands = sorted(operands)
        for o in operands:
            self.ItabC.write("#define O_%-7s { %-12s %-8s }\n" %
                    (o, self.OperandDict[o][0] + ",", self.OperandDict[o][1]));
        self.ItabC.write("\n");

        self.genInsnTable()
        self.genMnemonicsList()

        self.ItabC.close()

    def genItab( self, location ):
        self.genItabC(os.path.join(location, "udis86_itab.c"))
        self.genItabH(os.path.join(location, "udis86_itab.h"))

def usage():
    print("usage: ud_itab.py <optable.xml> <output-path>")

def main():

    if len(sys.argv) != 3:
        usage()
        sys.exit(1)
    
    tables = UdOpcodeTables(xml=sys.argv[1])
    itab   = UdItabGenerator(tables)
    itab.genItab(sys.argv[2])

if __name__ == '__main__':
    main()