From 64f97685d3cbd887bb72815cdd927b3e32804e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Mei=C3=9Fner?= Date: Wed, 25 Sep 2024 08:35:54 +0200 Subject: [PATCH] Adds tail call optimization in ANCHOR_ANCHOR_INTERNAL_FUNCTION_CALL_REG. --- src/jit.rs | 23 ++++++++++------------- src/x86.rs | 13 +++++++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/jit.rs b/src/jit.rs index 37064327..0a618260 100644 --- a/src/jit.rs +++ b/src/jit.rs @@ -1044,17 +1044,9 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { match dst { Value::Register(reg) => { - // Move guest_target_address into RAX - self.emit_ins(X86Instruction::push(REGISTER_MAP[0], None)); - if reg != REGISTER_MAP[0] { - self.emit_ins(X86Instruction::mov(OperandSize::S64, reg, REGISTER_MAP[0])); - } - + // Move guest_target_address into REGISTER_MAP[FRAME_PTR_REG] + self.emit_ins(X86Instruction::mov(OperandSize::S64, reg, REGISTER_MAP[FRAME_PTR_REG])); self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_ANCHOR_INTERNAL_FUNCTION_CALL_REG, 5))); - - self.emit_ins(X86Instruction::mov(OperandSize::S64, REGISTER_MAP[0], REGISTER_OTHER_SCRATCH)); - self.emit_ins(X86Instruction::pop(REGISTER_MAP[0])); // Restore RAX - self.emit_ins(X86Instruction::call_reg(REGISTER_OTHER_SCRATCH, None)); // callq *REGISTER_OTHER_SCRATCH }, Value::Constant64(target_pc, user_provided) => { debug_assert!(user_provided); @@ -1482,9 +1474,11 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { self.emit_ins(X86Instruction::return_near()); // Routine for emit_internal_call(Value::Register()) - // Inputs: Guest current pc in REGISTER_SCRATCH, Guest target address in REGISTER_MAP[0] - // Outputs: Guest target pc in REGISTER_SCRATCH, Host target address in REGISTER_MAP[0] + // Inputs: Guest current pc in REGISTER_SCRATCH, Guest target address in REGISTER_MAP[FRAME_PTR_REG] + // Outputs: Guest target pc in REGISTER_SCRATCH, Host target address in RIP self.set_anchor(ANCHOR_ANCHOR_INTERNAL_FUNCTION_CALL_REG); + self.emit_ins(X86Instruction::push(REGISTER_MAP[0], None)); + self.emit_ins(X86Instruction::mov(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], REGISTER_MAP[0])); // Calculate offset relative to program_vm_addr self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], self.program_vm_addr as i64)); self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, REGISTER_MAP[FRAME_PTR_REG], REGISTER_MAP[0], 0, None)); // guest_target_address -= self.program_vm_addr; @@ -1513,7 +1507,10 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_MAP[0], REGISTER_MAP[0], X86IndirectAccess::Offset(0))); // host_target_address = self.result.pc_section[host_target_address / 8]; // Load the frame pointer again since we've clobbered REGISTER_MAP[FRAME_PTR_REG] self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], stack_pointer_access)); - self.emit_ins(X86Instruction::return_near()); + // Restore the clobbered REGISTER_MAP[0] + self.emit_ins(X86Instruction::mov(OperandSize::S64, REGISTER_MAP[0], REGISTER_OTHER_SCRATCH)); + self.emit_ins(X86Instruction::pop(REGISTER_MAP[0])); + self.emit_ins(X86Instruction::jump_reg(REGISTER_OTHER_SCRATCH, None)); // Tail call to host_target_address // Translates a vm memory address to a host memory address for (access_type, len) in &[ diff --git a/src/x86.rs b/src/x86.rs index eaaa1384..b592f475 100644 --- a/src/x86.rs +++ b/src/x86.rs @@ -613,6 +613,19 @@ impl X86Instruction { } } + /// Jump to absolute destination + #[inline] + pub const fn jump_reg(destination: u8, indirect: Option) -> Self { + Self { + size: OperandSize::S64, + opcode: 0xff, + first_operand: 4, + second_operand: destination, + indirect, + ..Self::DEFAULT + } + } + /// Push RIP and jump to relative destination #[inline] pub const fn call_immediate(relative_destination: i32) -> Self {