Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Translate debug info #5

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,11 @@ By default, Enjarify will refuse to overwrite the output file if it already exis

### Why not dex2jar?

Dex2jar is an older tool that also tries to translate Dalvik to Java bytecode. It works reasonable well most of the time, but a lot of obscure features or edge cases will cause it to fail or even silently produce incorrect results. By contrast, Enjarify is designed to work in as many cases as possible, even for code where Dex2jar would fail. Among other things, Enjarify correctly handles unicode class names, constants used as multiple types, implicit casts, exception handlers jumping into normal control flow, classes that reference too many constants, very long methods, exception handlers after a catchall handler, and static initial values of the wrong type.
Dex2jar is an older tool that also tries to translate Dalvik to Java bytecode. It works reasonable well most of the time, but a lot of obscure features or edge cases will cause it to fail or even silently produce incorrect results. By contrast, Enjarify is designed to work in as many cases as possible, even for code where Dex2jar would fail. Among other things, Enjarify correctly handles unicode class names, constants used as multiple types, implicit casts, exception handlers jumping into normal control flow, classes that reference too many constants, very long methods, exception handlers after a catchall handler, and static initial values of the wrong type. Enjarify can also translate optional metadata such as sourcefile attributes, line numbers, and annotations.


### Limitations

Enjarify does not currently translate optional metadata such as sourcefile attributes, line numbers, and annotations.

Enjarify tries hard to successfully translate as many classes as possible, but there are some potential cases where it is simply not possible due to limitations in Android, Java, or both. Luckily, this only happens in contrived circumstances, so it shouldn't be a problem in practice.


Expand Down
1 change: 1 addition & 0 deletions enjarify/byteio.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def _leb128(self, signed=False):

def uleb128(self): return self._leb128()
def sleb128(self): return self._leb128(signed=True)
def uleb128p1(self): return self._leb128() - 1

# Maintain strings in binary encoding instead of attempting to decode them
# since the output will be using the same encoding anyway
Expand Down
66 changes: 66 additions & 0 deletions enjarify/debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

DBG_END_SEQUENCE = 0x00
DBG_ADVANCE_PC = 0x01
DBG_ADVANCE_LINE = 0x02
DBG_START_LOCAL = 0x03
DBG_START_LOCAL_EXTENDED = 0x04
DBG_END_LOCAL = 0x05
DBG_RESTART_LOCAL = 0x06
DBG_SET_PROLOGUE_END = 0x07
DBG_SET_EPILOGUE_BEGIN = 0x08
DBG_SET_FILE = 0x09
DBG_FIRST_SPECIAL = 0x0A

class DebugInstruction:
def __init__(self, dex, stream):
self.opcode = stream.u8()
if self.opcode == DBG_ADVANCE_PC:
self.addr_diff = stream.uleb128()
elif self.opcode == DBG_ADVANCE_LINE:
self.line_diff = stream.sleb128()
elif self.opcode == DBG_START_LOCAL:
self.register_num = stream.uleb128()
self.name_idx = stream.uleb128p1()
self.type_idx = stream.uleb128p1()

self.name = dex.string(self.name_idx)
self.type = dex.type(self.type_idx)
elif self.opcode == DBG_START_LOCAL_EXTENDED:
self.register_num = stream.uleb128()
self.name_idx = stream.uleb128p1()
self.type_idx = stream.uleb128p1()
self.sig_idx = stream.uleb128p1()

self.name = dex.string(self.name_idx)
self.type = dex.type(self.type_idx)
self.sig = dex.string(self.sig_idx)
elif self.opcode == DBG_END_LOCAL:
self.register_num = stream.uleb128()
elif self.opcode == DBG_RESTART_LOCAL:
self.register_num = stream.uleb128()
elif self.opcode == DBG_SET_FILE:
self.name_idx = stream.uleb128p1()

self.name = dex.string(self.name_idx)

def parseDebugInfo(dex, stream):
ops = []
while 1:
op = DebugInstruction(dex, stream)
ops.append(op)
if op.opcode == DBG_END_SEQUENCE:
break
return ops
15 changes: 12 additions & 3 deletions enjarify/jvm/optimization/jumps.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,24 @@ def optimizeJumps(irdata):
assert ins.min <= ins.max
ins.max = ins.min

def createBytecode(irdata, opts):
def createBytecode(irdata):
assert len(irdata.pos_map) == len(irdata.flat_instructions)
instrs = irdata.flat_instructions
posd, end_pos = _calcMinimumPositions(instrs)

pos_bytecode_map = [] # map each dalvik position -> ir -> to a bytecode offset
last_dalvik_pos = -1

bytecode = bytearray()
for ins in instrs:
for ins, dalvik_pos in zip(instrs, irdata.pos_map):
if isinstance(ins, (ir.LazyJumpBase, ir.Switch)):
ins.calcBytecode(posd, irdata.labels)
bytecode += ins.bytecode
if dalvik_pos != last_dalvik_pos:
pos_bytecode_map += [len(bytecode)] * (dalvik_pos - last_dalvik_pos)
last_dalvik_pos = dalvik_pos
#print(str(len(bytecode)))
assert len(pos_bytecode_map) == max(irdata.pos_map) + 1
assert len(bytecode) == end_pos


Expand Down Expand Up @@ -97,4 +106,4 @@ def createBytecode(irdata, opts):
print('Skipping zero width exception!')
assert 0

return bytes(bytecode), packed_excepts
return bytes(bytecode), packed_excepts, pos_bytecode_map
3 changes: 2 additions & 1 deletion enjarify/jvm/optimization/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
class Options:
def __init__(self, inline_consts=False, prune_store_loads=False,
copy_propagation=False, remove_unused_regs=False, dup2ize=False,
sort_registers=False, split_pool=False, delay_consts=False):
sort_registers=False, split_pool=False, delay_consts=False, translate_debug=False):
self.inline_consts = inline_consts
self.prune_store_loads = prune_store_loads
self.copy_propagation = copy_propagation
Expand All @@ -24,6 +24,7 @@ def __init__(self, inline_consts=False, prune_store_loads=False,
self.sort_registers = sort_registers
self.split_pool = split_pool
self.delay_consts = delay_consts
self.translate_debug = translate_debug

NONE = Options()
# Options which make the generated code more readable for humans
Expand Down
6 changes: 4 additions & 2 deletions enjarify/jvm/optimization/registers.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ def removeUnusedRegisters(irdata):
# For simplicity, parameter registers are preserved as is
def simpleAllocateRegisters(irdata):
instrs = irdata.flat_instructions
regmap = {v:i for i,v in enumerate(irdata.initial_args)}
nextreg = len(irdata.initial_args)
regmap = {v: i for i,v in enumerate(irdata.initial_args) if v != None}
nextreg = len(regmap)

for instr in instrs:
if isinstance(instr, ir.RegAccess):
Expand All @@ -172,6 +172,7 @@ def simpleAllocateRegisters(irdata):
nextreg += 1
instr.calcBytecode(regmap[instr.key])
irdata.numregs = nextreg
irdata.regmap = {k:v for k,v in regmap.items()} # keep track of original registers for debug info

# Sort registers by number of uses so that more frequently used registers will
# end up in slots 0-3 or 4-255 and benefit from the shorter instruction forms
Expand Down Expand Up @@ -229,3 +230,4 @@ def sortAllocateRegisters(irdata):
for instr in instrs:
if instr.bytecode is None and isinstance(instr, ir.RegAccess):
instr.calcBytecode(regmap[instr.key])
irdata.regmap = regmap
12 changes: 9 additions & 3 deletions enjarify/jvm/writebytecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

from ..byteio import Writer
from . import writeir, ir
from .optimization import registers, jumps, stack, consts
from .optimization import registers, jumps, stack, consts, options
from .writedebug import writeDebugAttributes

def getCodeIR(pool, method, opts):
if method.code is not None:
Expand Down Expand Up @@ -77,7 +78,7 @@ def finishCodeAttrs(pool, code_irs, opts):
def writeCodeAttributeTail(pool, irdata, opts):
method = irdata.method
jumps.optimizeJumps(irdata)
bytecode, excepts = jumps.createBytecode(irdata, opts)
bytecode, excepts, pos_map = jumps.createBytecode(irdata)

stream = Writer()
# For simplicity, don't bother calculating the actual maximum stack height
Expand All @@ -95,5 +96,10 @@ def writeCodeAttributeTail(pool, irdata, opts):
stream.write(b''.join(excepts))

# attributes
stream.u16(0)
if opts.translate_debug and method.code.debug_info != None:
attr_count, attrs = writeDebugAttributes(pool, irdata, pos_map, len(bytecode), irdata.regmap)
stream.u16(attr_count)
stream.write(attrs)
else:
stream.u16(0)
return stream
34 changes: 26 additions & 8 deletions enjarify/jvm/writeclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,33 @@ def writeField(pool, stream, field):
else:
stream.u16(0) # no attributes

def writeMethod(pool, stream, method, code_attr_data):
def writeMethod(pool, stream, method, code_attr_data, opts):
stream.u16(method.access & flags.METHOD_FLAGS)
stream.u16(pool.utf8(method.id.name))
stream.u16(pool.utf8(method.id.desc))


code_attr = 0
param_attr = 0
if code_attr_data is not None:
code_attr = 1
if opts.translate_debug and method.code != None and method.code.debug_info != None and len(method.code.debug_info.parameter_names):
param_attr = 1
stream.u16(code_attr + param_attr)
if code_attr:
code_attr_data = code_attr_data.toBytes()
stream.u16(1)
stream.u16(pool.utf8(b"Code"))
stream.u32(len(code_attr_data))
stream.write(code_attr_data)
else:
stream.u16(0) # no attributes
if param_attr:
stream.u16(pool.utf8(b"MethodParameters"))
stream.u32(1 + len(method.code.debug_info.parameter_names) * 4)
stream.u8(len(method.code.debug_info.parameter_names))
for name in method.code.debug_info.parameter_names:
if name != None:
stream.u16(pool.utf8(name))
else:
stream.u16(0)
stream.u16(0) # dex doesn't have access_flags

def writeMethods(pool, stream, methods, opts):
code_irs = []
Expand All @@ -66,7 +80,7 @@ def writeMethods(pool, stream, methods, opts):

stream.u16(len(methods))
for method in methods:
writeMethod(pool, stream, method, code_attrs.get(method))
writeMethod(pool, stream, method, code_attrs.get(method), opts)

def classFileAfterPool(cls, opts):
stream = Writer()
Expand Down Expand Up @@ -115,8 +129,12 @@ def toClassFile(cls, opts):
try:
pool, rest_stream = classFileAfterPool(cls, opts=opts)
except error.ClassfileLimitExceeded:
# print('Retrying {} with optimization enabled'.format(cls.name))
pool, rest_stream = classFileAfterPool(cls, opts=options.ALL)
print('Retrying {} with optimization enabled'.format(cls.name))
newopts = options.ALL
if opts.translate_debug:
newopts.translate_debug = True
newopts.sort_registers = False # breaks translate debug for some reason
pool, rest_stream = classFileAfterPool(cls, opts=newopts)

# write constant pool
pool.write(stream)
Expand Down
Loading