From c99577a8d4baadfe8fd30be2fd9ffa3d31c80682 Mon Sep 17 00:00:00 2001 From: abc4857 Date: Tue, 5 Sep 2017 23:02:04 -0400 Subject: [PATCH] Add option to translate debug info --- README.md | 4 +- enjarify/byteio.py | 1 + enjarify/debug.py | 66 ++++++++ enjarify/jvm/optimization/jumps.py | 15 +- enjarify/jvm/optimization/options.py | 3 +- enjarify/jvm/optimization/registers.py | 6 +- enjarify/jvm/writebytecode.py | 12 +- enjarify/jvm/writeclass.py | 34 +++- enjarify/jvm/writedebug.py | 210 +++++++++++++++++++++++++ enjarify/jvm/writeir.py | 20 ++- enjarify/main.py | 15 +- enjarify/parsedex.py | 24 ++- 12 files changed, 381 insertions(+), 29 deletions(-) create mode 100644 enjarify/debug.py create mode 100644 enjarify/jvm/writedebug.py diff --git a/README.md b/README.md index 101bf74..1419518 100644 --- a/README.md +++ b/README.md @@ -40,13 +40,11 @@ By default, Enjarify will refuse to overwrite the output file if it already exis ### Why not dex2jar? -Dex2jar is an older tool that also tries to translate Dalvik to Java bytecode. It works reasonable well most of the time, but a lot of obscure features or edge cases will cause it to fail or even silently produce incorrect results. By contrast, Enjarify is designed to work in as many cases as possible, even for code where Dex2jar would fail. Among other things, Enjarify correctly handles unicode class names, constants used as multiple types, implicit casts, exception handlers jumping into normal control flow, classes that reference too many constants, very long methods, exception handlers after a catchall handler, and static initial values of the wrong type. +Dex2jar is an older tool that also tries to translate Dalvik to Java bytecode. It works reasonable well most of the time, but a lot of obscure features or edge cases will cause it to fail or even silently produce incorrect results. By contrast, Enjarify is designed to work in as many cases as possible, even for code where Dex2jar would fail. Among other things, Enjarify correctly handles unicode class names, constants used as multiple types, implicit casts, exception handlers jumping into normal control flow, classes that reference too many constants, very long methods, exception handlers after a catchall handler, and static initial values of the wrong type. Enjarify can also translate optional metadata such as sourcefile attributes, line numbers, and annotations. ### Limitations -Enjarify does not currently translate optional metadata such as sourcefile attributes, line numbers, and annotations. - Enjarify tries hard to successfully translate as many classes as possible, but there are some potential cases where it is simply not possible due to limitations in Android, Java, or both. Luckily, this only happens in contrived circumstances, so it shouldn't be a problem in practice. diff --git a/enjarify/byteio.py b/enjarify/byteio.py index b6065a6..42032a2 100644 --- a/enjarify/byteio.py +++ b/enjarify/byteio.py @@ -54,6 +54,7 @@ def _leb128(self, signed=False): def uleb128(self): return self._leb128() def sleb128(self): return self._leb128(signed=True) + def uleb128p1(self): return self._leb128() - 1 # Maintain strings in binary encoding instead of attempting to decode them # since the output will be using the same encoding anyway diff --git a/enjarify/debug.py b/enjarify/debug.py new file mode 100644 index 0000000..f993399 --- /dev/null +++ b/enjarify/debug.py @@ -0,0 +1,66 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DBG_END_SEQUENCE = 0x00 +DBG_ADVANCE_PC = 0x01 +DBG_ADVANCE_LINE = 0x02 +DBG_START_LOCAL = 0x03 +DBG_START_LOCAL_EXTENDED = 0x04 +DBG_END_LOCAL = 0x05 +DBG_RESTART_LOCAL = 0x06 +DBG_SET_PROLOGUE_END = 0x07 +DBG_SET_EPILOGUE_BEGIN = 0x08 +DBG_SET_FILE = 0x09 +DBG_FIRST_SPECIAL = 0x0A + +class DebugInstruction: + def __init__(self, dex, stream): + self.opcode = stream.u8() + if self.opcode == DBG_ADVANCE_PC: + self.addr_diff = stream.uleb128() + elif self.opcode == DBG_ADVANCE_LINE: + self.line_diff = stream.sleb128() + elif self.opcode == DBG_START_LOCAL: + self.register_num = stream.uleb128() + self.name_idx = stream.uleb128p1() + self.type_idx = stream.uleb128p1() + + self.name = dex.string(self.name_idx) + self.type = dex.type(self.type_idx) + elif self.opcode == DBG_START_LOCAL_EXTENDED: + self.register_num = stream.uleb128() + self.name_idx = stream.uleb128p1() + self.type_idx = stream.uleb128p1() + self.sig_idx = stream.uleb128p1() + + self.name = dex.string(self.name_idx) + self.type = dex.type(self.type_idx) + self.sig = dex.string(self.sig_idx) + elif self.opcode == DBG_END_LOCAL: + self.register_num = stream.uleb128() + elif self.opcode == DBG_RESTART_LOCAL: + self.register_num = stream.uleb128() + elif self.opcode == DBG_SET_FILE: + self.name_idx = stream.uleb128p1() + + self.name = dex.string(self.name_idx) + +def parseDebugInfo(dex, stream): + ops = [] + while 1: + op = DebugInstruction(dex, stream) + ops.append(op) + if op.opcode == DBG_END_SEQUENCE: + break + return ops diff --git a/enjarify/jvm/optimization/jumps.py b/enjarify/jvm/optimization/jumps.py index d204fa9..bea1c68 100644 --- a/enjarify/jvm/optimization/jumps.py +++ b/enjarify/jvm/optimization/jumps.py @@ -55,15 +55,24 @@ def optimizeJumps(irdata): assert ins.min <= ins.max ins.max = ins.min -def createBytecode(irdata, opts): +def createBytecode(irdata): + assert len(irdata.pos_map) == len(irdata.flat_instructions) instrs = irdata.flat_instructions posd, end_pos = _calcMinimumPositions(instrs) + + pos_bytecode_map = [] # map each dalvik position -> ir -> to a bytecode offset + last_dalvik_pos = -1 bytecode = bytearray() - for ins in instrs: + for ins, dalvik_pos in zip(instrs, irdata.pos_map): if isinstance(ins, (ir.LazyJumpBase, ir.Switch)): ins.calcBytecode(posd, irdata.labels) bytecode += ins.bytecode + if dalvik_pos != last_dalvik_pos: + pos_bytecode_map += [len(bytecode)] * (dalvik_pos - last_dalvik_pos) + last_dalvik_pos = dalvik_pos + #print(str(len(bytecode))) + assert len(pos_bytecode_map) == max(irdata.pos_map) + 1 assert len(bytecode) == end_pos @@ -97,4 +106,4 @@ def createBytecode(irdata, opts): print('Skipping zero width exception!') assert 0 - return bytes(bytecode), packed_excepts + return bytes(bytecode), packed_excepts, pos_bytecode_map diff --git a/enjarify/jvm/optimization/options.py b/enjarify/jvm/optimization/options.py index 4e9bdf5..d0321c0 100644 --- a/enjarify/jvm/optimization/options.py +++ b/enjarify/jvm/optimization/options.py @@ -15,7 +15,7 @@ class Options: def __init__(self, inline_consts=False, prune_store_loads=False, copy_propagation=False, remove_unused_regs=False, dup2ize=False, - sort_registers=False, split_pool=False, delay_consts=False): + sort_registers=False, split_pool=False, delay_consts=False, translate_debug=False): self.inline_consts = inline_consts self.prune_store_loads = prune_store_loads self.copy_propagation = copy_propagation @@ -24,6 +24,7 @@ def __init__(self, inline_consts=False, prune_store_loads=False, self.sort_registers = sort_registers self.split_pool = split_pool self.delay_consts = delay_consts + self.translate_debug = translate_debug NONE = Options() # Options which make the generated code more readable for humans diff --git a/enjarify/jvm/optimization/registers.py b/enjarify/jvm/optimization/registers.py index c3b0eca..ee2f18d 100644 --- a/enjarify/jvm/optimization/registers.py +++ b/enjarify/jvm/optimization/registers.py @@ -160,8 +160,8 @@ def removeUnusedRegisters(irdata): # For simplicity, parameter registers are preserved as is def simpleAllocateRegisters(irdata): instrs = irdata.flat_instructions - regmap = {v:i for i,v in enumerate(irdata.initial_args)} - nextreg = len(irdata.initial_args) + regmap = {v: i for i,v in enumerate(irdata.initial_args) if v != None} + nextreg = len(regmap) for instr in instrs: if isinstance(instr, ir.RegAccess): @@ -172,6 +172,7 @@ def simpleAllocateRegisters(irdata): nextreg += 1 instr.calcBytecode(regmap[instr.key]) irdata.numregs = nextreg + irdata.regmap = {k:v for k,v in regmap.items()} # keep track of original registers for debug info # Sort registers by number of uses so that more frequently used registers will # end up in slots 0-3 or 4-255 and benefit from the shorter instruction forms @@ -229,3 +230,4 @@ def sortAllocateRegisters(irdata): for instr in instrs: if instr.bytecode is None and isinstance(instr, ir.RegAccess): instr.calcBytecode(regmap[instr.key]) + irdata.regmap = regmap diff --git a/enjarify/jvm/writebytecode.py b/enjarify/jvm/writebytecode.py index ca9e1ab..31bf672 100644 --- a/enjarify/jvm/writebytecode.py +++ b/enjarify/jvm/writebytecode.py @@ -14,7 +14,8 @@ from ..byteio import Writer from . import writeir, ir -from .optimization import registers, jumps, stack, consts +from .optimization import registers, jumps, stack, consts, options +from .writedebug import writeDebugAttributes def getCodeIR(pool, method, opts): if method.code is not None: @@ -77,7 +78,7 @@ def finishCodeAttrs(pool, code_irs, opts): def writeCodeAttributeTail(pool, irdata, opts): method = irdata.method jumps.optimizeJumps(irdata) - bytecode, excepts = jumps.createBytecode(irdata, opts) + bytecode, excepts, pos_map = jumps.createBytecode(irdata) stream = Writer() # For simplicity, don't bother calculating the actual maximum stack height @@ -95,5 +96,10 @@ def writeCodeAttributeTail(pool, irdata, opts): stream.write(b''.join(excepts)) # attributes - stream.u16(0) + if opts.translate_debug and method.code.debug_info != None: + attr_count, attrs = writeDebugAttributes(pool, irdata, pos_map, len(bytecode), irdata.regmap) + stream.u16(attr_count) + stream.write(attrs) + else: + stream.u16(0) return stream diff --git a/enjarify/jvm/writeclass.py b/enjarify/jvm/writeclass.py index de5499c..cc403f5 100644 --- a/enjarify/jvm/writeclass.py +++ b/enjarify/jvm/writeclass.py @@ -44,19 +44,33 @@ def writeField(pool, stream, field): else: stream.u16(0) # no attributes -def writeMethod(pool, stream, method, code_attr_data): +def writeMethod(pool, stream, method, code_attr_data, opts): stream.u16(method.access & flags.METHOD_FLAGS) stream.u16(pool.utf8(method.id.name)) stream.u16(pool.utf8(method.id.desc)) - + + code_attr = 0 + param_attr = 0 if code_attr_data is not None: + code_attr = 1 + if opts.translate_debug and method.code != None and method.code.debug_info != None and len(method.code.debug_info.parameter_names): + param_attr = 1 + stream.u16(code_attr + param_attr) + if code_attr: code_attr_data = code_attr_data.toBytes() - stream.u16(1) stream.u16(pool.utf8(b"Code")) stream.u32(len(code_attr_data)) stream.write(code_attr_data) - else: - stream.u16(0) # no attributes + if param_attr: + stream.u16(pool.utf8(b"MethodParameters")) + stream.u32(1 + len(method.code.debug_info.parameter_names) * 4) + stream.u8(len(method.code.debug_info.parameter_names)) + for name in method.code.debug_info.parameter_names: + if name != None: + stream.u16(pool.utf8(name)) + else: + stream.u16(0) + stream.u16(0) # dex doesn't have access_flags def writeMethods(pool, stream, methods, opts): code_irs = [] @@ -66,7 +80,7 @@ def writeMethods(pool, stream, methods, opts): stream.u16(len(methods)) for method in methods: - writeMethod(pool, stream, method, code_attrs.get(method)) + writeMethod(pool, stream, method, code_attrs.get(method), opts) def classFileAfterPool(cls, opts): stream = Writer() @@ -115,8 +129,12 @@ def toClassFile(cls, opts): try: pool, rest_stream = classFileAfterPool(cls, opts=opts) except error.ClassfileLimitExceeded: - # print('Retrying {} with optimization enabled'.format(cls.name)) - pool, rest_stream = classFileAfterPool(cls, opts=options.ALL) + print('Retrying {} with optimization enabled'.format(cls.name)) + newopts = options.ALL + if opts.translate_debug: + newopts.translate_debug = True + newopts.sort_registers = False # breaks translate debug for some reason + pool, rest_stream = classFileAfterPool(cls, opts=newopts) # write constant pool pool.write(stream) diff --git a/enjarify/jvm/writedebug.py b/enjarify/jvm/writedebug.py new file mode 100644 index 0000000..7b609d7 --- /dev/null +++ b/enjarify/jvm/writedebug.py @@ -0,0 +1,210 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..byteio import Writer +from .. import debug + +class LineNumberTableEntry: + def __init__(self, start_pc, line_number): + self.start_pc = start_pc + self.line_number = line_number + +class LocalVariableTableEntry: + def __init__(self, start_pc, length, name, descriptor, index): + self.start_pc = start_pc + self.length = length + self.name = name + self.descriptor = descriptor + self.index = index + +class LocalVariableTypeTableEntry: + def __init__(self, start_pc, length, name, signature, index): + self.start_pc = start_pc + self.length = length + self.name = name + self.signature = signature + self.index = index + +def translateAddr(addr, pos_map): + if addr < len(pos_map): + return pos_map[addr] + else: +# print("Debug info referenced dalvik instruction", str(addr), +# "which was removed during optimization. Using instruction number " + str(len(pos_map) - 1) + " instead.") + return len(pos_map) - 1 + +def endVariableAt(index, addr, tables, implicit=False): + found = 0 + for entry in tables: + if entry.index == index and entry.start_pc <= addr <= entry.start_pc + entry.length: + entry.length = addr - entry.start_pc + found += 1 +# if implicit: +# print(" Ended", entry.descriptor.decode("utf-8") if isinstance(entry, LocalVariableTableEntry) +# else entry.signature.decode("utf-8"), entry.name.decode("utf-8"), "implicitly") + return found + +class RegisterInfo: + def __init__(self, name, type, sig): + self.name = name + self.type = type + self.sig = sig + +def makeDebugTables(pool, irdata, pos_map, bytecode_len, regmap): # TODO: move in writeir? + method_name = irdata.method.id.name.decode("utf-8") +# print("Translating debug info for method", method_name) + debug_info = irdata.method.code.debug_info + line_number_table = [] + local_variable_table = [] + local_variable_type_table = [] + + line = debug_info.line_start + addr = 0 + last_local = {} + for inst in debug_info.bytecode: + if inst.opcode == debug.DBG_END_SEQUENCE: + break + + elif inst.opcode == debug.DBG_ADVANCE_PC: + addr += inst.addr_diff + + elif inst.opcode == debug.DBG_ADVANCE_LINE: + line += inst.line_diff + + elif inst.opcode == debug.DBG_START_LOCAL: + taddr = translateAddr(addr, pos_map) +# print(" DBG_START_LOCAL", inst.type.decode("utf-8"), inst.name.decode("utf-8"), +# "(addr=" + str(addr), "taddr=" + str(taddr), "line=" + str(line) + ")") + found = False + for k, index in regmap.items(): + if(k[0] == inst.register_num): + endVariableAt(index, taddr, local_variable_table + local_variable_type_table, True) + local_variable_table.append(LocalVariableTableEntry(taddr, bytecode_len - taddr, inst.name, inst.type, index)) + found = True + if not found: + print("Register", inst.register_num, "in", method_name, "doesn't exist but had local variable info:", taddr, inst.type, inst.name) + last_local[inst.register_num] = RegisterInfo(inst.name, inst.type, None) + + elif inst.opcode == debug.DBG_START_LOCAL_EXTENDED: + taddr = translateAddr(addr, pos_map) +# print(" DBG_START_LOCAL_EXTENDED", inst.type.decode("utf-8") if inst.type is not None else None, +# "(sig=" + inst.sig.decode("utf-8") if inst.sig is not None else None + ")", inst.name.decode("utf-8"), +# "(addr=" + str(addr), "taddr=" + str(taddr), "line=" + str(line) + ")") + found = 0 + for k, index in regmap.items(): + if(k[0] == inst.register_num): + endVariableAt(index, taddr, local_variable_table + local_variable_type_table, True) + if inst.type is not None or inst.type is None and inst.sig is None: + entry = LocalVariableTableEntry(taddr, bytecode_len - taddr, inst.name, inst.type, index) + local_variable_table.append(entry) + if inst.sig is not None: + entry = LocalVariableTypeTableEntry(taddr, bytecode_len - taddr, inst.name, inst.sig, index) + local_variable_type_table.append(entry) + found += 1 + if not found: + print("Register", inst.register_num, "in", method_name, "doesn't exist but had extended local variable info:", taddr, inst.sig, inst.name) + last_local[inst.register_num] = RegisterInfo(inst.name, inst.type, inst.sig) + + elif inst.opcode == debug.DBG_END_LOCAL: + taddr = translateAddr(addr, pos_map) +# print(" DBG_END_LOCAL", inst.register_num, "(addr=" + str(addr), "taddr=" + str(taddr), "line=" + str(line) + ")") + found = 0 + for k, index in regmap.items(): + if(k[0] == inst.register_num): + found += endVariableAt(index, taddr, local_variable_table + local_variable_type_table) +# if not found: +# print("Tried to end non-existing local register_num =", inst.register_num) # Why is this happening? + + elif inst.opcode == debug.DBG_RESTART_LOCAL: +# taddr = translateAddr(addr, pos_map) +# print(" DBG_RESTART_LOCAL", inst.register_num, "(addr=" + str(addr), "taddr=" +# + str(taddr), "line=" + str(line), "last_local=" + str(last_local) + ")") + if inst.register_num in last_local: + taddr = translateAddr(addr, pos_map) + info = last_local[inst.register_num] + for k, index in regmap.items(): + if(k[0] == inst.register_num): + if endVariableAt(index, taddr, local_variable_table + local_variable_type_table): + pass +# print(" Restarting still existing variable!") # Why is this happening? + if info.type is not None or info.type is None and info.sig is None: + entry = LocalVariableTableEntry(taddr, bytecode_len - taddr, info.name, info.type, index) + local_variable_table.append(entry) + if info.sig is not None: + entry = LocalVariableTypeTableEntry(taddr, bytecode_len - taddr, info.name, info.sig, index) + local_variable_type_table.append(entry) +# else: +# print("Tried to restart local register_num =", inst.register_num,"but no previous entry in that register!") # Why is this happening? + + elif inst.opcode == debug.DBG_SET_FILE: + #taddr = translateAddr(addr, pos_map) + #print(" DBG_RESTART_LOCAL", inst.name.decode("utf-8"), "(addr=" + str(addr), "taddr=" + str(taddr), "line=" + str(line) + ")") + print("Can't translate DBG_SET_FILE in method", irdata.method.id.name.decode("utf-8"), ", no JVM equivalent!") + + elif inst.opcode >= debug.DBG_FIRST_SPECIAL: + adjusted_opcode = inst.opcode - debug.DBG_FIRST_SPECIAL + line += -4 + (adjusted_opcode % 15) + addr += adjusted_opcode // 15 + entry = LineNumberTableEntry(translateAddr(addr, pos_map), line) + line_number_table.append(entry) + + if not len(line_number_table): + line_number_table = None + if not len(local_variable_table): + local_variable_table = None + if not len(local_variable_type_table): + local_variable_type_table = None + return line_number_table, local_variable_table, local_variable_type_table + +def writeDebugAttributes(pool, irdata, pos_map, bytecode_len, regmap): + stream = Writer() + attr_count = 0 + line_number_table, local_variable_table, local_variable_type_table = makeDebugTables(pool, irdata, pos_map, bytecode_len, regmap) + + if line_number_table is not None: + attr_count += 1 + stream.u16(pool.utf8(b"LineNumberTable")) + stream.u32(2 + len(line_number_table) * 4) # attribute length + stream.u16(len(line_number_table)) +# print("LNT for", irdata.method.id.name, "has", len(line_number_table), "entries: ") + for entry in line_number_table: +# print(" start_pc:", entry.start_pc, "line_number:", entry.line_number) + stream.u16(entry.start_pc) + stream.u16(entry.line_number) + + if local_variable_table is not None: + attr_count += 1 + stream.u16(pool.utf8(b"LocalVariableTable")) + stream.u32(2 + len(local_variable_table) * 10) # attribute length + stream.u16(len(local_variable_table)) + for entry in local_variable_table: + stream.u16(entry.start_pc) + stream.u16(entry.length) + stream.u16(pool.utf8(entry.name)) + stream.u16(pool.utf8(entry.descriptor)) + stream.u16(entry.index) + + if local_variable_type_table is not None: + attr_count += 1 + stream.u16(pool.utf8(b"LocalVariableTypeTable")) + stream.u32(2 + len(local_variable_type_table) * 10) # attribute length + stream.u16(len(local_variable_type_table)) + for entry in local_variable_type_table: + stream.u16(entry.start_pc) + stream.u16(entry.length) + stream.u16(pool.utf8(entry.name)) + stream.u16(pool.utf8(entry.signature)) + stream.u16(entry.index) + + return attr_count, stream.toBytes() diff --git a/enjarify/jvm/writeir.py b/enjarify/jvm/writeir.py index 92c474e..4453340 100644 --- a/enjarify/jvm/writeir.py +++ b/enjarify/jvm/writeir.py @@ -200,6 +200,9 @@ def __init__(self, pool, method, types, opts): self.target_pred_counts = collections.defaultdict(int) self.numregs = None # will be set once registers are allocated (see registers.py) + + self.pos_map = None # map between ir position and dalvik position + self.regmap = None # map between dalvik register and position def calcInitialArgs(self, nregs, scalar_ptypes): self.initial_args = args = [] @@ -221,15 +224,20 @@ def createBlock(self, instr): def flatten(self): instructions = [] + self.pos_map = [] + last_pos = 0 for pos in sorted(self.iblocks): if pos in self.exception_redirects: # check if we can put handler pop in front of block if instructions and not instructions[-1].fallsthrough(): instructions.append(self.exception_redirects.pop(pos)) instructions.append(ir.Pop()) + self.pos_map += [last_pos] * 2 # Is this the right block? # if not, leave it in dict to be redirected later # now add instructions for actual block instructions += self.iblocks[pos].instructions + self.pos_map += [pos] * (len(self.iblocks[pos].instructions)) + last_pos = pos # exception handler pops that couldn't be placed inline # in this case, just put them at the end with a goto back to the handler @@ -237,17 +245,25 @@ def flatten(self): instructions.append(self.exception_redirects[target]) instructions.append(ir.Pop()) instructions.append(ir.Goto(target)) + self.pos_map += [last_pos] * 3 # Is this the correct position for these? self.flat_instructions = instructions self.iblocks = self.exception_redirects = None + assert len(self.pos_map) == len(self.flat_instructions) def replaceInstrs(self, replace): + assert len(self.pos_map) == len(self.flat_instructions) if replace: instructions = [] - for instr in self.flat_instructions: - instructions.extend(replace.get(instr, [instr])) + pos_map = [] + for instr, dalvik_pos in zip(self.flat_instructions, self.pos_map): + new_instrs = replace.get(instr, [instr]) + instructions.extend(new_instrs) + pos_map += [dalvik_pos] * len(new_instrs) self.flat_instructions = instructions + self.pos_map = pos_map assert len(set(instructions)) == len(instructions) + assert len(self.pos_map) == len(self.flat_instructions) def calcUpperBound(self): # Get an uppper bound on the size of the bytecode diff --git a/enjarify/main.py b/enjarify/main.py index e4197fe..a56f08b 100644 --- a/enjarify/main.py +++ b/enjarify/main.py @@ -23,7 +23,7 @@ def read(fname, mode='rb'): with open(fname, mode) as f: return f.read() -def translate(data, opts, classes=None, errors=None, allowErrors=True): +def translate(data, opts, classes=None, errors=None, allowErrors=True, printNames=False, skipAndroid=False): dex = parsedex.DexFile(data) classes = collections.OrderedDict() if classes is None else classes errors = collections.OrderedDict() if errors is None else errors @@ -33,6 +33,12 @@ def translate(data, opts, classes=None, errors=None, allowErrors=True): if unicode_name in classes or unicode_name in errors: print('Warning, duplicate class name', unicode_name) continue + + if skipAndroid and unicode_name.startswith('android/'): + continue + + if printNames: + print(unicode_name) try: class_data = writeclass.toClassFile(cls, opts) @@ -61,6 +67,9 @@ def main(): parser.add_argument('-o', '--output', help='Output .jar file. Default is [input-filename]-enjarify.jar.') parser.add_argument('-f', '--force', action='store_true', help='Force overwrite. If output file already exists, this option is required to overwrite.') parser.add_argument('--fast', action='store_true', help='Speed up translation at the expense of generated bytecode being less readable.') + parser.add_argument('--debug', action='store_true', help='Translate debug info (line numbers and local variable names and types). Run with --fast to avoid losing information (variables may be deleted during optimization)') + parser.add_argument('--classnames', action='store_true', help='Print class names while translating') + parser.add_argument('--skipandroid', action='store_true', help='Skip classes in the android.* package') args = parser.parse_args() dexs = [] @@ -91,10 +100,12 @@ def main(): return opts = options.NONE if args.fast else options.PRETTY + if args.debug: + opts.translate_debug = True classes = collections.OrderedDict() errors = collections.OrderedDict() for data in dexs: - translate(data, opts=opts, classes=classes, errors=errors) + translate(data, opts=opts, classes=classes, errors=errors, printNames=args.classnames, skipAndroid=args.skipandroid) writeToJar(outfile, classes) outfile.close() print('Output written to', outname) diff --git a/enjarify/parsedex.py b/enjarify/parsedex.py index ab7a517..b805f72 100644 --- a/enjarify/parsedex.py +++ b/enjarify/parsedex.py @@ -13,6 +13,7 @@ # limitations under the License. from .byteio import Reader from .dalvik import parseBytecode +from .debug import parseDebugInfo from .util import signExtend NO_INDEX = 0xFFFFFFFF @@ -136,6 +137,17 @@ def finish(self, dex, list_off): if size <= 0: results.append((b'java/lang/Throwable', stream.uleb128())) +class DebugInfoItem: + def __init__(self, dex, offset): + stream = dex.stream(offset) + self.line_start = stream.uleb128() + self.parameters_size = stream.uleb128() + self.parameter_name_idxs = [stream.uleb128p1() for _ in range(self.parameters_size)] + self.parameter_names = [] + for idx in self.parameter_name_idxs: + self.parameter_names.append(dex.string(idx)) + self.bytecode = parseDebugInfo(dex, stream) + class CodeItem: def __init__(self, dex, offset): stream = dex.stream(offset) @@ -143,7 +155,7 @@ def __init__(self, dex, offset): ins_size = stream.u16() outs_size = stream.u16() tries_size = stream.u16() - debug_off = stream.u32() + debug_info_off = stream.u32() self.insns_size = stream.u32() insns_start_pos = stream.pos insns = [stream.u16() for _ in range(self.insns_size)] @@ -157,6 +169,7 @@ def __init__(self, dex, offset): catch_addrs = set() for tryi in self.tries: catch_addrs.update(t[1] for t in tryi.catches) + self.debug_info = DebugInfoItem(dex, debug_info_off) if debug_info_off else None self.bytecode = parseBytecode(dex, insns_start_pos, insns, catch_addrs) class Method: @@ -254,10 +267,11 @@ def __init__(self, data): def stream(self, offset): return Reader(self.raw, offset) def string(self, i): - data_off = self.stream(self.string_ids.off + i*4).u32() - stream = self.stream(data_off) - stream.uleb128() # ignore decoded length - return stream.readCStr() + if 0 <= i < NO_INDEX: + data_off = self.stream(self.string_ids.off + i*4).u32() + stream = self.stream(data_off) + stream.uleb128() # ignore decoded length + return stream.readCStr() def type(self, i): if 0 <= i < NO_INDEX: