diff --git a/engine/README.md b/engine/README.md index 2003740..ec0c6e8 100644 --- a/engine/README.md +++ b/engine/README.md @@ -79,10 +79,14 @@ If you're curious, this is how the `run.py` script works. Study the source code ## Bytecode Instrumentation +Helpful resource: https://towardsdatascience.com/understanding-python-bytecode-e7edaae8734d + The engine counts the number of bytecodes used by code. It does this by inserting a function call, `__increment__()`, in between every single bytecode (each function call requires 3 bytecodes, so this increases the code size by 4x). Python bytecode post-3.6 consists of two bytes each. The first byte corresponds to the instruction, and the second byte to the argument. In case the argument needs to be bigger than one byte, additional `EXTENDED_ARG` are inserted before, containing the higher bits of the argument. At most 3 of them are allowed per instruction. The bytecodes operate on a stack machine. That is, most operations are performed on the top x stack elements. Here is a list of all bytecode instructions: https://docs.python.org/3/library/dis.html#python-bytecode-instructions. -The bytecode instrumentation is not perfect, because many of the bytecodes are not constant time. For example, `BUILD_STRING(i)` concatenates `i` strings, which should probably cost `i` bytecodes and not 1 bytecode. But meh, seems relatively benign. \ No newline at end of file +The bytecode instrumentation is not perfect, because many of the bytecodes are not constant time. For example, `BUILD_STRING(i)` concatenates `i` strings, which should probably cost `i` bytecodes and not 1 bytecode. But meh, seems relatively benign. + +Note: `is_jump_target` is never set on the extended args, but always on the root instruction. However, the jump target is the extended args instruction. \ No newline at end of file diff --git a/engine/malthusia/engine/__init__.py b/engine/malthusia/engine/__init__.py index 8ef3c92..73b1829 100644 --- a/engine/malthusia/engine/__init__.py +++ b/engine/malthusia/engine/__init__.py @@ -1,2 +1,13 @@ +import logging +import os + from .game import Game, BasicViewer, GameConstants -from .container import CodeContainer \ No newline at end of file +from .container import CodeContainer + +logger = logging.getLogger(__name__) +logger.setLevel(level=os.environ.get("LOGLEVEL", logging.WARNING)) +ch = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +ch.setLevel(level=logging.DEBUG) +ch.setFormatter(formatter) +logger.addHandler(ch) diff --git a/engine/malthusia/engine/container/instruction.py b/engine/malthusia/engine/container/instruction.py index 261624a..af6c522 100644 --- a/engine/malthusia/engine/container/instruction.py +++ b/engine/malthusia/engine/container/instruction.py @@ -2,14 +2,28 @@ from types import SimpleNamespace class Instruction(SimpleNamespace): - def __init__(self, instruction, in_dict=None): - if in_dict is not None: - super().__init__(**in_dict) + def __init__(self, instruction, original: bool): + vals = {a: b for a,b in zip(dis.Instruction._fields, instruction)} + vals["orig_offset"] = vals["offset"] + vals["original"] = original + assert((original and vals["offset"] is not None) or (not original and vals["offset"] is None)) + vals["orig_jump_target_offset"] = None + super().__init__(**vals) + + def calculate_orig_jump_target_offset(self): + assert(self.offset is not None) + assert(self.original) + assert(self.is_jumper()) + assert(self.offset == self.orig_offset) + if self.is_rel_jumper(): + self.orig_jump_target_offset = self.orig_offset + self.arg + 2 + elif self.is_abs_jumper(): + self.orig_jump_target_offset = self.arg else: - super().__init__(**{a:b for a,b in zip(dis.Instruction._fields+('jump_to', 'was_there', 'extra_extended_args'), instruction + (None, True, 0))}) + assert(False) def is_jumper(self): - return self.is_abs_jumper() or self.is_rel_jumper() + return self.is_rel_jumper() or self.is_abs_jumper() def is_rel_jumper(self): return self.opcode in dis.hasjrel @@ -17,22 +31,32 @@ def is_rel_jumper(self): def is_abs_jumper(self): return self.opcode in dis.hasjabs + def is_extended_arg(self): + return self.opcode == dis.opmap["EXTENDED_ARG"] + @classmethod - def ExtendedArgs(self, value): - return Instruction(None, in_dict={ - 'opcode':144, 'opname':'EXTENDED_ARGS', 'arg':value, - 'argval':value, 'argrepr':value, 'offset':None, - 'starts_line':None, 'is_jump_target':False, 'was_there': False, - 'extra_extended_args': 0, - }) - - def calculate_offset(self, instructions): - # Return the offset (rel or abs) to self.jump_to in instructions - target_loc = 2 * instructions.index(self.jump_to) - 2 * self.jump_to.extra_extended_args + def ExtendedArgs(self): + return Instruction(dis.Instruction( + opcode=dis.opmap["EXTENDED_ARG"], + opname='EXTENDED_ARG', + arg=1, + argval=1, + argrepr="", + offset=None, + starts_line=None, + is_jump_target=False + ), original=False) + + def calculate_jump_arg(self, orig_to_curr_offset): + assert(self.is_jumper()) + assert(self.orig_jump_target_offset is not None) + assert(self.orig_jump_target_offset in orig_to_curr_offset) + + target_offset = orig_to_curr_offset[self.orig_jump_target_offset] if self.is_abs_jumper(): - return target_loc - - self_loc = 2 * instructions.index(self) - - return target_loc - self_loc - 2 + return target_offset + elif self.is_rel_jumper(): + return target_offset - self.offset - 2 + else: + assert(False) \ No newline at end of file diff --git a/engine/malthusia/engine/container/instrument.py b/engine/malthusia/engine/container/instrument.py index ebb15d8..e83cbd5 100644 --- a/engine/malthusia/engine/container/instrument.py +++ b/engine/malthusia/engine/container/instrument.py @@ -1,8 +1,12 @@ import dis import math +import logging +import os from types import CodeType from .instruction import Instruction +logger = logging.getLogger(__name__) + class Instrument: """ A class for instrumenting specific methods (e.g. sort) as well as instrumenting competitor code @@ -39,171 +43,221 @@ def instrument(bytecode): new_consts.append(constant) new_consts = tuple(new_consts) + logger.debug(dis.Bytecode(bytecode).dis()) + instructions = list(dis.get_instructions(bytecode)) + orig_linestarts = list(dis.findlinestarts(bytecode)) + function_name_index = len(bytecode.co_names) # we will be inserting our __instrument__ call at the end of co_names # the injection, which consists of a function call to an __instrument__ method which increments bytecode # these three instructions will be inserted between every line of instrumented code injection = [ - dis.Instruction(opcode=116, opname='LOAD_GLOBAL', arg=function_name_index%256, argval='__instrument__', argrepr='__instrument__', offset=None, starts_line=None, is_jump_target=False), + dis.Instruction(opcode=116, opname='LOAD_GLOBAL', arg=function_name_index, argval='__instrument__', argrepr='__instrument__', offset=None, starts_line=None, is_jump_target=False), dis.Instruction(opcode=131, opname='CALL_FUNCTION', arg=0, argval=0, argrepr=0, offset=None, starts_line=None, is_jump_target=False), dis.Instruction(opcode=1, opname='POP_TOP', arg=None, argval=None, argrepr=None, offset=None, starts_line=None, is_jump_target=False) ] + injection = [Instruction(inst, original=False) for inst in injection] # extends the opargs so that it can store the index of __instrument__ inserted_extended_args = 0 - while function_name_index > 255: #(255 = 2^8 -1 = 1 oparg) + function_name_index >>= 8 + while function_name_index > 0: #(255 = 2^8 -1 = 1 oparg) if inserted_extended_args >= 3: # we can only insert 3! so abort! raise SyntaxError("Too many extended_args wanting to be inserted; possibly too many co_names (more than 2^32).") - function_name_index >>= 8 injection = [ - dis.Instruction( - opcode=dis.opmap["EXTENDED_ARG"], - opname='EXTENDED_ARG', - arg=function_name_index%256, - argval=function_name_index%256, - argrepr=function_name_index%256, - offset=None, - starts_line=None, - is_jump_target=False - ) + Instruction.ExtendedArgs() ] + injection + inserted_extended_args += 1 + function_name_index >>= 8 # convert every instruction into our own instruction format, which adds a couple of fields. for i, instruction in enumerate(instructions): - instructions[i] = Instruction(instruction) + instructions[i] = Instruction(instruction, original=True) - # Next, we cache a reference to the jumpers to each jump target in the targets + # now compute jump offset for every jumper for i, instruction in enumerate(instructions): - # We're only looking for jumpers - if not instruction.is_jumper(): - continue - - # TODO: shouldn't target here depend on whether it is an absolute or relative jumper? - # yep, this should definitely not work for relative jumps - # for both of them, however, we need to do some fancy keeping track of to keep track of them - target = [t for t in instructions if instruction.argval == t.offset][0] - instruction.jump_to = target - - # If any targets jump to themselves, that's not kosher. - if instruction == target: - raise SyntaxError('No self-referential loops.') - - unsafe = {110, 113, 114, 115, 116, 120, 124, 125, 131} # bytecode ops that break the instrument + if instruction.is_jumper(): + instruction.calculate_orig_jump_target_offset() + logger.debug(f"instr {instruction.offset} orig jump target offset: {instruction.orig_jump_target_offset}") # We then inject the injection before every call, except for those following an EXTENDED_ARGS. - cur_index = -1 - for (cur, last) in zip(instructions[:], [None]+instructions[:-1]): - cur_index += 1 - if last is not None and last.opcode == dis.opmap["EXTENDED_ARG"]: - continue + new_instructions = [] + for (cur, last) in zip(instructions, [None]+instructions[:-1]): - if last is not None and last.opcode in unsafe: + if last is not None and last.is_extended_arg(): + new_instructions.append(cur) continue - for j, inject in enumerate(injection): - injected_instruction = Instruction(inject) - injected_instruction.was_there = False # keeping track of the instructions added by us - instructions.insert(cur_index + j, injected_instruction) - cur_index += len(injection) + for inject in injection: + new_instructions.append(inject) + + new_instructions.append(cur) + instructions = new_instructions # Iterate through instructions. If it's a jumper, calculate the new correct offset. For each new offset, if it # is too large to fit in the current number of EXTENDED_ARGS, inject a new EXTENDED_ARG before it. If you never # insert a new EXTENDED_ARGS, break out of the loop. fixed = False while not fixed: + logger.debug("trying to fix the instruction jumps") fixed = True - i = 0 - for instruction in instructions[:]: - instruction.offset = 2 * i + # calculate new offsets + for i, instruction in enumerate(instructions): + instruction.offset = 2*i - if not instruction.is_jumper(): - i += 1 + # calculate map from orig_offset to cur_offset + orig_to_curr_offset = {} + for i, instruction in enumerate(instructions): + if not instruction.original: continue + cur_offset = instruction.offset + for prev_instr in instructions[max(i-3,0):i][::-1]: + if prev_instr.is_extended_arg(): + cur_offset -= 2 + assert(cur_offset == prev_instr.offset) + else: + break + orig_to_curr_offset[instruction.orig_offset] = cur_offset - correct_offset = instruction.calculate_offset(instructions) - instruction.arg = correct_offset % 256 - correct_offset >>= 8 + # now transform each jumper's argument to point to the cur offset instead of the orig offset + new_instructions = [] + cur_extended_args = [] + for instruction in instructions: + if instruction.is_extended_arg(): + cur_extended_args.append(instruction) + continue + if not instruction.is_jumper(): + new_instructions.extend(cur_extended_args) + new_instructions.append(instruction) + cur_extended_args = [] + continue - extended_args = 0 - while correct_offset > 0: - # Check if there is already an EXTENDED_ARGS behind - if i > extended_args and instructions[i - extended_args - 1].opcode == 144: - instructions[i - extended_args - 1].arg = correct_offset % 256 + real_arg = instruction.calculate_jump_arg(orig_to_curr_offset) + instruction.arg = real_arg - # Otherwise, insert a new one + real_arg >>= 8 + cur_extended_args_i = len(cur_extended_args) - 1 + while real_arg > 0: + if cur_extended_args_i > -1: + cur_extended_args_i -= 1 else: - instructions.insert(i, Instruction.ExtendedArgs(correct_offset % 256)) - instruction.extra_extended_args += 1 - i += 1 + cur_extended_args = [Instruction.ExtendedArgs()] + cur_extended_args + # this causes us to have to redo everything again fixed = False + real_arg >>= 8 + assert(cur_extended_args_i == -1) # we may never decrease the offsets, or something went very wrong (we only add instructions, which should monotonically increase offsets) + assert(len(cur_extended_args) <= 3) # max 3 extended args - correct_offset >>= 8 - extended_args += 1 - i += 1 - #Maintaining correct line info ( traceback bug fix) - #co_lnotab stores line information in Byte form - # It stores alterantively, the number of instructions to the next increase in line number and - # the increase in line number then - #We need to ensure that these are bytes (You might want to break an increase into two see the article or code below) - #The code did not update these bytes, we need to update the number of instructions before the beginning of each line - #It should be similar to the way the jump to statement were fixed, I tried to mimick them but failed, I feel like I do not inderstand instruction.py - # I am overestimating the number of instructions before the start of the line in this fix - # you might find the end of this article helpful: https://towardsdatascience.com/understanding-python-bytecode-e7edaae8734d - old_lnotab = {} #stores the old right info in a more usefull way (maps instruction num to line num) - i = 0 - line_num = 0 #maintains line number by adding differences - instruction_num = 0 #maintains the instruction num by addind differences - while 2*i < len(bytecode.co_lnotab): - instruction_num += bytecode.co_lnotab[2 * i] - line_num += bytecode.co_lnotab[2 * i + 1] - old_lnotab[instruction_num] = line_num - i += 1 - #Construct a map from old instruction numbers, to new ones. - num_injected = 0 - instruction_index = 0 - old_to_new_instruction_num = {} - for instruction in instructions: - if instruction.was_there: - old_to_new_instruction_num[2 * (instruction_index - num_injected)] = 2 * instruction_index - instruction_index += 1 - if not instruction.was_there: - num_injected += 1 - new_lnotab = {} - for key in old_lnotab: - new_lnotab[old_to_new_instruction_num[key]] = old_lnotab[key] - - #Creating a differences list of integers, while ensuring integers in it are bytes - pairs = sorted(new_lnotab.items()) + new_instructions.extend(cur_extended_args) + new_instructions.append(instruction) + cur_extended_args = [] + + assert(len(cur_extended_args)==0) + instructions = new_instructions + + # calculate new offsets + for i, instruction in enumerate(instructions): + instruction.offset = 2*i + + # calculate map from orig_offset to cur_offset + orig_to_curr_offset = {} + for i, instruction in enumerate(instructions): + if not instruction.original: + continue + cur_offset = instruction.offset + for prev_instr in instructions[max(i-3,0):i][::-1]: + if prev_instr.is_extended_arg(): + cur_offset -= 2 + assert(cur_offset == prev_instr.offset) + else: + break + orig_to_curr_offset[instruction.orig_offset] = cur_offset + + logger.debug(f"near-final instructions: {instructions}") + + # translate line numbers into curr offset + # this algorithm deduced from https://github.com/python/cpython/blob/3.9/Objects/lnotab_notes.txt#L56 + curr_linestarts = [(orig_to_curr_offset[offset], lineno) for offset, lineno in orig_linestarts] new_lnotab = [] - previous_pair = (0, 0) - for pair in pairs: - num_instructions = pair[0] - previous_pair[0] - num_lines = pair[1] - previous_pair[1] - while num_instructions > 127: - new_lnotab.append(127) - new_lnotab.append(0) - num_instructions -= 127 - new_lnotab.append(num_instructions) - while num_lines > 127: - new_lnotab.append(127) - new_lnotab.append(0) - num_lines -= 127 - new_lnotab.append(num_lines) - previous_pair = pair - #tranfer to bytes and we are good :) + if len(curr_linestarts) > 0: + logger.debug(f"orig linestarts: {orig_linestarts}") + logger.debug(f"lnotab: {[int(x) for x in bytecode.co_lnotab]}") + logger.debug(f"first line: {bytecode.co_firstlineno}") + if curr_linestarts[0][1] != bytecode.co_firstlineno: + curr_linestarts = [(0,bytecode.co_firstlineno)] + curr_linestarts + for cur, last in zip(curr_linestarts[1:],curr_linestarts[:-1]): + bytesdiff = cur[0] - last[0] + linediff = cur[1] - last[1] + while bytesdiff > 255: + new_lnotab += [255, 0] + bytesdiff -= 255 + if linediff >= 0: + while linediff > 127: + new_lnotab += [bytesdiff, 127] + linediff -= 127 + bytesdiff = 0 + if linediff > 0 or bytesdiff > 0: + new_lnotab += [bytesdiff, linediff] + else: + while linediff < -128: + new_lnotab += [bytesdiff, -128] + linediff -= -128 + bytesdiff = 0 + if linediff < 0 or bytesdiff > 0: + new_lnotab += [bytesdiff, linediff] + else: + assert(len(bytecode.co_lnotab) == 0) new_lnotab = bytes(new_lnotab) + + # convert args into 256-space + new_instructions = [] + cur_extended_args = [] + for instruction in instructions: + if instruction.is_extended_arg(): + cur_extended_args.append(instruction) + continue + if instruction.arg is None: + assert(len(cur_extended_args) == 0) + new_instructions.append(instruction) + continue + + real_arg = instruction.arg + logger.debug(f"real arg: {real_arg}") + logger.debug(f"cur extended args: {cur_extended_args}") + instruction.arg = real_arg % 256 + real_arg >>= 8 + + cur_extended_args_i = len(cur_extended_args) - 1 + while real_arg > 0: + assert(cur_extended_args_i > -1) + # modify the existing extended args + cur_extended_args[cur_extended_args_i].arg = real_arg % 256 + cur_extended_args_i -= 1 + real_arg >>= 8 + assert(cur_extended_args_i == -1) # we may never decrease the offsets, or something went very wrong (we only add instructions, which should monotonically increase offsets) + assert(len(cur_extended_args) <= 3) # max 3 extended args + + new_instructions.extend(cur_extended_args) + new_instructions.append(instruction) + cur_extended_args = [] + + assert(len(cur_extended_args)==0) + instructions = new_instructions + # Finally, we repackage up our instructions into a byte string and use it to build a new code object - byte_array = [[inst.opcode, 0 if inst.arg is None else inst.arg % 256] for inst in instructions] + assert(all([inst.arg is None or (0 <= inst.arg and inst.arg < 256) for inst in instructions])) + byte_array = [[inst.opcode, 0 if inst.arg is None else inst.arg] for inst in instructions] new_code = bytes(sum(byte_array, [])) # Make sure our code can locate the __instrument__ call new_names = tuple(bytecode.co_names) + ('__instrument__', ) + # return Instrument.build_code(bytecode, new_code, new_names, new_consts, new_lnotab) return Instrument.build_code(bytecode, new_code, new_names, new_consts, new_lnotab) @staticmethod