diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/disassembler/udis86/ud_opcode.py | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/JavaScriptCore/disassembler/udis86/ud_opcode.py')
-rw-r--r-- | Source/JavaScriptCore/disassembler/udis86/ud_opcode.py | 622 |
1 files changed, 622 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/disassembler/udis86/ud_opcode.py b/Source/JavaScriptCore/disassembler/udis86/ud_opcode.py new file mode 100644 index 000000000..fe1833dc7 --- /dev/null +++ b/Source/JavaScriptCore/disassembler/udis86/ud_opcode.py @@ -0,0 +1,622 @@ +# udis86 - scripts/ud_opcode.py +# +# Copyright (c) 2009, 2013 Vivek Thampi +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +# Some compatibility stuff for supporting python 2.x as well as python 3.x +def itemslist(dict): + try: + return dict.iteritems() # python 2.x + except AttributeError: + return list(dict.items()) # python 3.x + +class UdInsnDef: + """An x86 instruction definition + """ + def __init__(self, **insnDef): + self.mnemonic = insnDef['mnemonic'] + self.prefixes = insnDef['prefixes'] + self.opcodes = insnDef['opcodes'] + self.operands = insnDef['operands'] + self._cpuid = insnDef['cpuid'] + self._opcexts = {} + + for opc in self.opcodes: + if opc.startswith('/'): + e, v = opc.split('=') + self._opcexts[e] = v + + def lookupPrefix(self, pfx): + """Lookup prefix (if any, None otherwise), by name""" + return True if pfx in self.prefixes else None + + + @property + def vendor(self): + return self._opcexts.get('/vendor', None) + + @property + def mode(self): + return self._opcexts.get('/m', None) + + @property + def osize(self): + return self._opcexts.get('/o', None) + + def isDef64(self): + return 'def64' in self.prefixes + + def __str__(self): + return self.mnemonic + " " + ', '.join(self.operands) + \ + " " + ' '.join(self.opcodes) + + +class UdOpcodeTable: + """A single table of instruction definitions, indexed by + a decode field. + """ + + class CollisionError(Exception): + pass + + class IndexError(Exception): + """Invalid Index Error""" + pass + + @classmethod + def vendor2idx(cls, v): + return (0 if v == 'amd' + else (1 if v == 'intel' + else 2)) + + @classmethod + def vex2idx(cls, v): + if v.startswith("none_"): + v = v[5:] + vexOpcExtMap = { + 'none' : 0x0, + '0f' : 0x1, + '0f38' : 0x2, + '0f3a' : 0x3, + '66' : 0x4, + '66_0f' : 0x5, + '66_0f38' : 0x6, + '66_0f3a' : 0x7, + 'f3' : 0x8, + 'f3_0f' : 0x9, + 'f3_0f38' : 0xa, + 'f3_0f3a' : 0xb, + 'f2' : 0xc, + 'f2_0f' : 0xd, + 'f2_0f38' : 0xe, + 'f2_0f3a' : 0xf, + } + return vexOpcExtMap[v] + + + # A mapping of opcode extensions to their representational + # values used in the opcode map. + OpcExtMap = { + '/rm' : lambda v: int(v, 16), + '/x87' : lambda v: int(v, 16), + '/3dnow' : lambda v: int(v, 16), + '/reg' : lambda v: int(v, 16), + # modrm.mod + # (!11, 11) => (00b, 01b) + '/mod' : lambda v: 0 if v == '!11' else 1, + # Mode extensions: + # (16, 32, 64) => (00, 01, 02) + '/o' : lambda v: (int(v) / 32), + '/a' : lambda v: (int(v) / 32), + # Disassembly mode + # (!64, 64) => (00b, 01b) + '/m' : lambda v: 1 if v == '64' else 0, + # SSE + # none => 0 + # f2 => 1 + # f3 => 2 + # 66 => 3 + '/sse' : lambda v: (0 if v == 'none' + else (((int(v, 16) & 0xf) + 1) / 2)), + # AVX + '/vex' : lambda v: UdOpcodeTable.vex2idx(v), + '/vexw' : lambda v: 0 if v == '0' else 1, + '/vexl' : lambda v: 0 if v == '0' else 1, + # Vendor + '/vendor': lambda v: UdOpcodeTable.vendor2idx(v) + } + + + _TableInfo = { + 'opctbl' : { 'label' : 'UD_TAB__OPC_TABLE', 'size' : 256 }, + '/sse' : { 'label' : 'UD_TAB__OPC_SSE', 'size' : 4 }, + '/reg' : { 'label' : 'UD_TAB__OPC_REG', 'size' : 8 }, + '/rm' : { 'label' : 'UD_TAB__OPC_RM', 'size' : 8 }, + '/mod' : { 'label' : 'UD_TAB__OPC_MOD', 'size' : 2 }, + '/m' : { 'label' : 'UD_TAB__OPC_MODE', 'size' : 2 }, + '/x87' : { 'label' : 'UD_TAB__OPC_X87', 'size' : 64 }, + '/a' : { 'label' : 'UD_TAB__OPC_ASIZE', 'size' : 3 }, + '/o' : { 'label' : 'UD_TAB__OPC_OSIZE', 'size' : 3 }, + '/3dnow' : { 'label' : 'UD_TAB__OPC_3DNOW', 'size' : 256 }, + '/vendor' : { 'label' : 'UD_TAB__OPC_VENDOR', 'size' : 3 }, + '/vex' : { 'label' : 'UD_TAB__OPC_VEX', 'size' : 16 }, + '/vexw' : { 'label' : 'UD_TAB__OPC_VEX_W', 'size' : 2 }, + '/vexl' : { 'label' : 'UD_TAB__OPC_VEX_L', 'size' : 2 }, + } + + + def __init__(self, typ): + assert typ in self._TableInfo + self._typ = typ + self._entries = {} + + + def size(self): + return self._TableInfo[self._typ]['size'] + + def entries(self): + return itemslist(self._entries) + + def numEntries(self): + return len(self._entries.keys()) + + def label(self): + return self._TableInfo[self._typ]['label'] + + def typ(self): + return self._typ + + def meta(self): + return self._typ + + + def __str__(self): + return "table-%s" % self._typ + + + def add(self, opc, obj): + typ = UdOpcodeTable.getOpcodeTyp(opc) + idx = UdOpcodeTable.getOpcodeIdx(opc) + if self._typ != typ or idx in self._entries: + raise CollisionError() + self._entries[idx] = obj + + + def lookup(self, opc): + typ = UdOpcodeTable.getOpcodeTyp(opc) + idx = UdOpcodeTable.getOpcodeIdx(opc) + if self._typ != typ: + raise UdOpcodeTable.CollisionError("%s <-> %s" % (self._typ, typ)) + return self._entries.get(idx, None) + + + def entryAt(self, index): + """Returns the entry at a given index of the table, + None if there is none. Raises an exception if the + index is out of bounds. + """ + if index < self.size(): + return self._entries.get(index, None) + raise self.IndexError("index out of bounds: %s" % index) + + def setEntryAt(self, index, obj): + if index < self.size(): + self._entries[index] = obj + else: + raise self.IndexError("index out of bounds: %s" % index) + + @classmethod + def getOpcodeTyp(cls, opc): + if opc.startswith('/'): + return opc.split('=')[0] + else: + return 'opctbl' + + + @classmethod + def getOpcodeIdx(cls, opc): + if opc.startswith('/'): + typ, v = opc.split('=') + return cls.OpcExtMap[typ](v) + else: + # plain opctbl opcode + return int(opc, 16) + + + @classmethod + def getLabels(cls): + """Returns a list of all labels""" + return [cls._TableInfo[k]['label'] for k in cls._TableInfo.keys()] + + +class UdOpcodeTables(object): + """Collection of opcode tables + """ + + class CollisionError(Exception): + def __init__(self, obj1, obj2): + self.obj1, self.obj2 = obj1, obj2 + + def newTable(self, typ): + """Create a new opcode table of a give type `typ`. """ + tbl = UdOpcodeTable(typ) + self._tables.append(tbl) + return tbl + + def mkTrie(self, opcodes, obj): + """Recursively contruct a trie entry mapping a string of + opcodes to an object. + """ + if len(opcodes) == 0: + return obj + opc = opcodes[0] + tbl = self.newTable(UdOpcodeTable.getOpcodeTyp(opc)) + tbl.add(opc, self.mkTrie(opcodes[1:], obj)) + return tbl + + def walk(self, tbl, opcodes): + """Walk down the opcode trie, starting at a given opcode + table, given a string of opcodes. Return None if unable + to walk, the object at the leaf otherwise. + """ + opc = opcodes[0] + e = tbl.lookup(opc) + if e is None: + return None + elif isinstance(e, UdOpcodeTable) and len(opcodes[1:]): + return self.walk(e, opcodes[1:]) + return e + + def map(self, tbl, opcodes, obj): + """Create a mapping from a given string of opcodes to an + object in the opcode trie. Constructs trie branches as + needed. + """ + opc = opcodes[0] + e = tbl.lookup(opc) + if e is None: + tbl.add(opc, self.mkTrie(opcodes[1:], obj)) + else: + if len(opcodes[1:]) == 0: + raise self.CollisionError(e, obj) + self.map(e, opcodes[1:], obj) + + def __init__(self, xml): + self._tables = [] + self._insns = [] + self._mnemonics = {} + + # The root table is always a 256 entry opctbl, indexed + # by a plain opcode byte + self.root = self.newTable('opctbl') + + if os.getenv("UD_OPCODE_DEBUG"): + self._logFh = open("opcodeTables.log", "w") + + # add an invalid instruction entry without any mapping + # in the opcode tables. + self.invalidInsn = UdInsnDef(mnemonic="invalid", opcodes=[], cpuid=[], + operands=[], prefixes=[]) + self._insns.append(self.invalidInsn) + + # Construct UdOpcodeTables object from the given + # udis86 optable.xml + for insn in self.__class__.parseOptableXML(xml): + self.addInsnDef(insn) + self.patchAvx2byte() + self.mergeSSENONE() + self.printStats() + + def log(self, s): + if os.getenv("UD_OPCODE_DEBUG"): + self._logFh.write(s + "\n") + + + def mergeSSENONE(self): + """Merge sse tables with only one entry for /sse=none + """ + for table in self._tables: + for k, e in table.entries(): + if isinstance(e, UdOpcodeTable) and e.typ() == '/sse': + if e.numEntries() == 1: + sse = e.lookup("/sse=none") + if sse: + table.setEntryAt(k, sse) + uniqTables = {} + def genTableList(tbl): + if tbl not in uniqTables: + self._tables.append(tbl) + uniqTables[tbl] = 1 + for k, e in tbl.entries(): + if isinstance(e, UdOpcodeTable): + genTableList(e) + self._tables = [] + genTableList(self.root) + + + def patchAvx2byte(self): + # create avx tables + for pp in (None, 'f2', 'f3', '66'): + for m in (None, '0f', '0f38', '0f3a'): + if pp is None and m is None: + continue + if pp is None: + vex = m + elif m is None: + vex = pp + else: + vex = pp + '_' + m + table = self.walk(self.root, ('c4', '/vex=' + vex)) + self.map(self.root, ('c5', '/vex=' + vex), table) + + + def addInsn(self, **insnDef): + + # Canonicalize opcode list + opcexts = insnDef['opcexts'] + opcodes = list(insnDef['opcodes']) + + # Re-order vex + if '/vex' in opcexts: + assert opcodes[0] == 'c4' or opcodes[0] == 'c5' + opcodes.insert(1, '/vex=' + opcexts['/vex']) + + # Add extensions. The order is important, and determines how + # well the opcode table is packed. Also note, /sse must be + # before /o, because /sse may consume operand size prefix + # affect the outcome of /o. + for ext in ('/mod', '/x87', '/reg', '/rm', '/sse', '/o', '/a', '/m', + '/vexw', '/vexl', '/3dnow', '/vendor'): + if ext in opcexts: + opcodes.append(ext + '=' + opcexts[ext]) + + insn = UdInsnDef(mnemonic = insnDef['mnemonic'], + prefixes = insnDef['prefixes'], + operands = insnDef['operands'], + opcodes = opcodes, + cpuid = insnDef['cpuid']) + try: + self.map(self.root, opcodes, insn) + except self.CollisionError as e: + self.pprint() + print(opcodes, insn, str(e.obj1), str(e.obj2)) + raise + except Exception as e: + self.pprint() + raise + self._insns.append(insn) + # add to lookup by mnemonic structure + if insn.mnemonic not in self._mnemonics: + self._mnemonics[insn.mnemonic] = [ insn ] + else: + self._mnemonics[insn.mnemonic].append(insn) + + + def addInsnDef(self, insnDef): + opcodes = [] + opcexts = {} + + # pack plain opcodes first, and collect opcode + # extensions + for opc in insnDef['opcodes']: + if not opc.startswith('/'): + opcodes.append(opc) + else: + e, v = opc.split('=') + opcexts[e] = v + + # treat vendor as an opcode extension + if len(insnDef['vendor']): + opcexts['/vendor'] = insnDef['vendor'][0] + + if insnDef['mnemonic'] in ('lds', 'les'): + # + # Massage lds and les, which share the same prefix as AVX + # instructions, to work well with the opcode tree. + # + opcexts['/vex'] = 'none' + elif '/vex' in opcexts: + # A proper avx instruction definition; make sure there are + # no legacy opcode extensions + assert '/sse' not in opcodes + + # make sure the opcode definitions don't already include + # the avx prefixes. + assert opcodes[0] not in ('c4', 'c5') + + # An avx only instruction is defined by the /vex= opcode + # extension. They do not include the c4 (long form) or + # c5 (short form) prefix. As part of opcode table generate, + # here we create the long form definition, and then patch + # the table for c5 in a later stage. + # Construct a long-form definition of the avx instruction + opcodes.insert(0, 'c4') + elif (opcodes[0] == '0f' and opcodes[1] != '0f' and + '/sse' not in opcexts): + # Make all 2-byte opcode form isntructions play nice with sse + # opcode maps. + opcexts['/sse'] = 'none' + + # legacy sse defs that get promoted to avx + fn = self.addInsn + if 'avx' in insnDef['cpuid'] and '/sse' in opcexts: + fn = self.addSSE2AVXInsn + + fn(mnemonic = insnDef['mnemonic'], + prefixes = insnDef['prefixes'], + opcodes = opcodes, + opcexts = opcexts, + operands = insnDef['operands'], + cpuid = insnDef['cpuid']) + + + def addSSE2AVXInsn(self, **insnDef): + """Add an instruction definition containing an avx cpuid bit, but + declared in its legacy SSE form. The function splits the + definition to create two new definitions, one for SSE and one + promoted to an AVX form. + """ + + # SSE + ssemnemonic = insnDef['mnemonic'] + sseopcodes = insnDef['opcodes'] + # remove vex opcode extensions + sseopcexts = dict([(e, v) for e, v in itemslist(insnDef['opcexts']) + if not e.startswith('/vex')]) + # strip out avx operands, preserving relative ordering + # of remaining operands + sseoperands = [opr for opr in insnDef['operands'] + if opr not in ('H', 'L')] + # strip out avx prefixes + sseprefixes = [pfx for pfx in insnDef['prefixes'] + if not pfx.startswith('vex')] + # strip out avx bits from cpuid + ssecpuid = [flag for flag in insnDef['cpuid'] + if not flag.startswith('avx')] + + self.addInsn(mnemonic = ssemnemonic, + prefixes = sseprefixes, + opcodes = sseopcodes, + opcexts = sseopcexts, + operands = sseoperands, + cpuid = ssecpuid) + + # AVX + vexmnemonic = 'v' + insnDef['mnemonic'] + vexprefixes = insnDef['prefixes'] + vexopcodes = ['c4'] + vexopcexts = dict([(e, insnDef['opcexts'][e]) + for e in insnDef['opcexts'] if e != '/sse']) + vexopcexts['/vex'] = insnDef['opcexts']['/sse'] + '_' + '0f' + if insnDef['opcodes'][1] == '38' or insnDef['opcodes'][1] == '3a': + vexopcexts['/vex'] += insnDef['opcodes'][1] + vexopcodes.extend(insnDef['opcodes'][2:]) + else: + vexopcodes.extend(insnDef['opcodes'][1:]) + vexoperands = [] + for o in insnDef['operands']: + # make the operand size explicit: x + if o in ('V', 'W', 'H', 'U'): + o = o + 'x' + vexoperands.append(o) + vexcpuid = [flag for flag in insnDef['cpuid'] + if not flag.startswith('sse')] + + self.addInsn(mnemonic = vexmnemonic, + prefixes = vexprefixes, + opcodes = vexopcodes, + opcexts = vexopcexts, + operands = vexoperands, + cpuid = vexcpuid) + + def getInsnList(self): + """Returns a list of all instructions in the collection""" + return self._insns + + + def getTableList(self): + """Returns a list of all tables in the collection""" + return self._tables + + def getMnemonicsList(self): + """Returns a sorted list of mnemonics""" + return sorted(self._mnemonics.keys()) + + + def pprint(self): + def printWalk(tbl, indent=""): + entries = tbl.entries() + for k, e in entries: + if isinstance(e, UdOpcodeTable): + self.log("%s |-<%02x> %s" % (indent, k, e)) + printWalk(e, indent + " |") + elif isinstance(e, UdInsnDef): + self.log("%s |-<%02x> %s" % (indent, k, e)) + printWalk(self.root) + + + def printStats(self): + tables = self.getTableList() + self.log("stats: ") + self.log(" Num tables = %d" % len(tables)) + self.log(" Num insnDefs = %d" % len(self.getInsnList())) + self.log(" Num insns = %d" % len(self.getMnemonicsList())) + + totalSize = 0 + totalEntries = 0 + for table in tables: + totalSize += table.size() + totalEntries += table.numEntries() + self.log(" Packing Ratio = %d%%" % ((totalEntries * 100) / totalSize)) + self.log("--------------------") + + self.pprint() + + + @staticmethod + def parseOptableXML(xml): + """Parse udis86 optable.xml file and return list of + instruction definitions. + """ + from xml.dom import minidom + + xmlDoc = minidom.parse(xml) + tlNode = xmlDoc.firstChild + insns = [] + + while tlNode and tlNode.localName != "x86optable": + tlNode = tlNode.nextSibling + + for insnNode in tlNode.childNodes: + if not insnNode.localName: + continue + if insnNode.localName != "instruction": + raise Exception("warning: invalid insn node - %s" % insnNode.localName) + mnemonic = insnNode.getElementsByTagName('mnemonic')[0].firstChild.data + vendor, cpuid = '', [] + + for node in insnNode.childNodes: + if node.localName == 'vendor': + vendor = node.firstChild.data.split() + elif node.localName == 'cpuid': + cpuid = node.firstChild.data.split() + + for node in insnNode.childNodes: + if node.localName == 'def': + insnDef = { 'pfx' : [] } + for node in node.childNodes: + if not node.localName: + continue + if node.localName in ('pfx', 'opc', 'opr', 'vendor', 'cpuid'): + insnDef[node.localName] = node.firstChild.data.split() + elif node.localName == 'mode': + insnDef['pfx'].extend(node.firstChild.data.split()) + insns.append({'prefixes' : insnDef.get('pfx', []), + 'mnemonic' : mnemonic, + 'opcodes' : insnDef.get('opc', []), + 'operands' : insnDef.get('opr', []), + 'vendor' : insnDef.get('vendor', vendor), + 'cpuid' : insnDef.get('cpuid', cpuid)}) + return insns |