diff --git a/crates/interpreter/src/lib.rs b/crates/interpreter/src/lib.rs index 665318a..99adc7d 100644 --- a/crates/interpreter/src/lib.rs +++ b/crates/interpreter/src/lib.rs @@ -1,299 +1,225 @@ -pub mod machine; - -#[cfg(test)] -mod tests { - use machine::Machine; - use sonatina_ir::{module::FuncRef, Immediate, Module, Type}; - - use super::*; +use cranelift_entity::SecondaryMap; +use sonatina_ir::{ + interpret::{Action, EvalValue, Interpret, State}, + isa::Endian, + module::FuncRef, + prelude::*, + BlockId, DataFlowGraph, Function, Immediate, InstId, Module, Type, Value, ValueId, I256, +}; + +pub struct Machine { + frames: Vec, + pc: InstId, + action: Action, + pub module: Module, + memory: Vec, +} - fn parse_module(input: &str) -> Module { - match sonatina_parser::parse_module(input) { - Ok(pm) => pm.module, - Err(errs) => { - for err in errs { - eprintln!("{}", err.print_to_string("[test]", input, true)); - } - panic!("parsing failed"); - } +impl Machine { + pub fn new(module: Module) -> Self { + Self { + frames: Vec::new(), + // Dummy pc + pc: InstId(0), + action: Action::Continue, + module, + memory: Vec::new(), } } - fn setup(input: &str) -> (Machine, Vec) { - let module = parse_module(input); - let funcs = module.iter_functions().collect(); - (Machine::new(module), funcs) + pub fn run(&mut self, func: FuncRef, args: Vec) -> EvalValue { + let frame = Frame::new(func, &self.module, args); + self.frames.push(frame); + self.action = Action::Continue; + self.run_on_func() } - #[test] - fn unary() { - let input = " - target = \"evm-ethereum-london\" + pub fn clear_state(&mut self) { + self.frames.clear(); + self.memory.clear(); + } - func private %test() -> i32 { - block0: - v1.i32 = not 0.i32; - v2.i32 = neg v1; - return v2; - } - "; + fn top_frame(&self) -> &Frame { + self.frames.last().unwrap() + } - let (mut machine, funcs) = setup(input); - let result = machine.run(funcs[0], vec![]); + fn top_frame_mut(&mut self) -> &mut Frame { + self.frames.last_mut().unwrap() + } - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i32(), 1); + fn top_func(&self) -> &Function { + &self.module.funcs[self.top_frame().func] } - #[test] - fn binary_arithmetic() { - let input = " - target = \"evm-ethereum-london\" - - func private %test() -> i16 { - block0: - v0.i16 = add 3.i16 4.i16; - v1.i16 = sub v0 1.i16; - v2.i16 = udiv v1 2.i16; - v3.i16 = sdiv v2 65535.i16; - return v3; - } - "; + fn run_on_func(&mut self) -> EvalValue { + let layout = &self.top_func().layout; - let (mut machine, funcs) = setup(input); - let result = machine.run(funcs[0], vec![]); + let entry_block = layout.entry_block().unwrap(); + self.pc = layout.first_inst_of(entry_block).unwrap(); - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i16(), -3); - } + loop { + let inst = self.top_func().dfg.inst(self.pc); + let Some(interpretable): Option<&dyn Interpret> = + InstDowncast::downcast(self.top_func().inst_set(), inst) + else { + panic!("`Intepret is not yet implemented for `{}`", inst.as_text()); + }; - #[test] - fn cast_sext() { - let input = " - target = \"evm-ethereum-london\" + let e_val = interpretable.interpret(self); + if let Some(inst_result) = self.top_func().dfg.inst_result(self.pc) { + self.top_frame_mut().map_val(inst_result, e_val); + }; - func private %test() -> i16 { - block0: - v0.i16 = sext -128.i8 i16; - return v0; - } - "; + match self.action { + Action::Continue => { + self.pc = self.top_func().layout.next_inst_of(self.pc).unwrap(); + } - let (mut machine, funcs) = setup(input); - let result = machine.run(funcs[0], vec![]); + Action::JumpTo(next_block) => { + let current_block = self.top_func().layout.inst_block(self.pc); + self.top_frame_mut().prev_block = Some(current_block); + self.pc = self.top_func().layout.first_inst_of(next_block).unwrap(); + } - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i16(), -128); + Action::FallThrough => { + panic!("fall through detected!") + } + + Action::Return(e_val) => return e_val, + } + } } +} - #[test] - fn cast_zext() { - let input = " - target = \"evm-ethereum-london\" +pub struct Frame { + func: FuncRef, + locals: SecondaryMap, + prev_block: Option, +} + +impl Frame { + fn new(func: FuncRef, module: &Module, arg_e_values: Vec) -> Self { + let arg_values = &module.funcs[func].arg_values; + assert_eq!(arg_values.len(), arg_e_values.len()); + + let mut frame = Self { + func, + locals: SecondaryMap::default(), + prev_block: None, + }; - func private %test() -> i16 { - block0: - v0.i16 = zext -128.i8 i16; - return v0; + for (arg_val, arg_e_val) in arg_values.iter().zip(arg_e_values.into_iter()) { + frame.map_val(*arg_val, arg_e_val); } - "; - let (mut machine, funcs) = setup(input); - let result = machine.run(funcs[0], vec![]); + frame + } - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i16(), 128); + fn map_val(&mut self, val: ValueId, e_val: EvalValue) { + self.locals[val] = e_val; } +} - // // TODO: uncomment this when issue https://github.com/fe-lang/sonatina/issues/74 is resolved. - // // #[test] - // // fn load_store() { - // // let input = " - // // target = \"evm-ethereum-london\" - - // // func private %test() -> i32 { - // // block0: - // // mstore 0.*i32 1.i32 i32; - // // v1.i32 = load 0.*i32 i32; - // // return v1; - // // } - // // "; - - // // let (mut machine, func) = setup(input); - // // let result = machine.run(func, vec![]); - - // // assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i32(), 128); - // // } - - // #[test] - // fn call() { - // let input = " - // target = \"evm-ethereum-london\" - - // func public %test_callee(v0.i8) -> i8 { - // block0: - // v1.i8 = mul v0 2.i8; - // return v1; - // } - - // func public %test() -> i8 { - // block0: - // v0.i8 = call %test_callee 3.i8; - // return v0; - // } - // "; - - // let (mut machine, funcs) = setup(input); - // let result = machine.run(funcs[1], vec![]); - - // assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i8(), 6); - // } - - // #[test] - // fn jump() { - // let input = " - // target = \"evm-ethereum-london\" - - // func private %test() -> i1 { - // block0: - // jump block2; - // block1: - // return 1.i1; - // block2: - // return 0.i1; - // } - // "; - - // let (mut machine, funcs) = setup(input); - // let result = machine.run(funcs[0], vec![]); - - // assert!(!result.as_imm().unwrap().as_i256().trunc_to_i1()); - // } - - // #[test] - // fn branch() { - // let input = " - // target = \"evm-ethereum-london\" - - // func private %test() -> i8 { - // block0: - // br 1.i1 block1 block2; - // block1: - // return 1.i8; - // block2: - // return 2.i8; - // } - // "; - - // let (mut machine, funcs) = setup(input); - // let result = machine.run(funcs[0], vec![]); - - // assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i8(), 1); - // } - - // #[test] - // fn br_table() { - // let input = " - // target = \"evm-ethereum-london\" - - // func private %test() -> i64 { - // block0: - // br_table 1.i64 (0.i64 block1) (1.i64 block2); - // block1: - // return 1.i64; - // block2: - // return 2.i64; - // block3: - // return 3.i64; - // } - // "; - - // let (mut machine, funcs) = setup(input); - // let result = machine.run(funcs[0], vec![]); - - // assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i64(), 2); - // } - - #[test] - fn phi() { - let input = " - target = \"evm-ethereum-london\" - - func private %test() -> i8 { - block0: - br 1.i1 block1 block2; - block1: - jump block2; - block2: - v0.i8 = phi (1.i8 block0) (-1.i8 block1); - return v0; +impl State for Machine { + fn lookup_val(&mut self, value_id: ValueId) -> EvalValue { + let value = self.top_func().dfg.value(value_id); + match value { + Value::Immediate { imm, .. } => (*imm).into(), + Value::Global { .. } => { + todo!() + } + _ => self.top_frame().locals[value_id], } - "; + } - let (mut machine, funcs) = setup(input); - let result = machine.run(funcs[0], vec![]); + fn call_func(&mut self, func: FuncRef, args: Vec) -> EvalValue { + let ret_addr = self.pc; - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i8(), -1); - } + let new_frame = Frame::new(func, &self.module, args); + self.frames.push(new_frame); - #[test] - fn gep() { - let input = " - target = \"evm-ethereum-london\" + let result = self.run_on_func(); - type @s1 = {i32, i64, i1}; + self.frames.pop(); + self.pc = ret_addr; - func private %test(v0.i256) -> *i1 { - block0: - v1.*@s1 = int_to_ptr v0 *@s1; - v2.*i1 = gep v1 0.i256 2.i256; - return v2; - } - "; + result + } - let (mut machine, funcs) = setup(input); - let arg = Immediate::zero(Type::I256); - let result = machine.run(funcs[0], vec![arg.into()]); + fn set_action(&mut self, action: Action) { + self.action = action + } - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i64(), 12); + fn prev_block(&mut self) -> BlockId { + let frame = self.top_frame(); + frame.prev_block.unwrap() } - #[cfg(target_arch = "aarch64")] - #[test] - fn gep_ptr_ty() { - let input = " - target = \"evm-ethereum-london\" - - func private %test(v0.i256) -> **i32 { - block0: - v1.*[*i32; 3] = int_to_ptr v0 *[*i32; 3]; - v2.**i32 = gep v1 0.i256 2.i256; - return v2; + fn load(&mut self, addr: EvalValue, ty: Type) -> EvalValue { + if !(ty.is_integral() || ty.is_pointer(&self.module.ctx)) { + // TODO: we need to decide how to handle load of aggregate type when it fits + // into register/stack-slot size. + todo!(); + } + + let Some(addr) = addr.as_imm() else { + panic!("udnef address in load") + }; + let addr = addr.as_usize(); + let size = self.module.ctx.size_of(ty); + if addr + size > self.memory.len() { + panic!("uninitialized memory access is detected"); } - "; - let (mut machine, funcs) = setup(input); - let arg = Immediate::zero(Type::I256); - let result = machine.run(funcs[0], vec![arg.into()]); + let slice = &self.memory[addr..addr + size]; + let value_i256 = match self.module.ctx.endian() { + Endian::Be => I256::from_be_bytes(slice), + Endian::Le => I256::from_le_bytes(slice), + }; - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i64(), 64); + let imm = Immediate::from_i256(value_i256, ty); + EvalValue::Imm(imm) } - #[test] - fn gep_nested_aggr_ty() { - let input = " - target = \"evm-ethereum-london\" + fn store(&mut self, addr: EvalValue, value: EvalValue, ty: Type) -> EvalValue { + if !(ty.is_integral() || ty.is_pointer(&self.module.ctx)) { + // TODO: we need to decide how to handle load of aggregate type when it fits + // into register/stack-slot size. + todo!(); + } - type @s1 = {i32, [i16; 3], [i8; 2]}; + let Some(addr) = addr.as_imm() else { + panic!("udnef address in store") + }; + let addr = addr.as_usize(); + let size = self.module.ctx.size_of(ty); + if addr + size > self.memory.len() { + self.memory.resize(addr + size, 0); + } - func private %test(v0.i256) -> *i8 { - block0: - v1.*@s1 = int_to_ptr v0 *@s1; - v2.*i8 = gep v1 0.i256 2.i256 1.i256; - return v2; + let Some(value) = value.as_imm() else { + panic!("undef value in store"); + }; + + match self.module.ctx.endian() { + Endian::Be => { + let v = value.as_i256().to_u256(); + let bytes = v.to_big_endian(); + let slice = &bytes[bytes.len() - size..]; + self.memory[addr..addr + size].copy_from_slice(slice); + } + Endian::Le => { + let v = value.as_i256().to_u256(); + let bytes = v.to_little_endian(); + let slice = &bytes[..size]; + self.memory[addr..addr + size].copy_from_slice(slice); + } } - "; - let (mut machine, funcs) = setup(input); - let arg = Immediate::zero(Type::I256); - let result = machine.run(funcs[0], vec![arg.into()]); + EvalValue::Undef + } - assert_eq!(result.as_imm().unwrap().as_i256().trunc_to_i64(), 11); + fn dfg(&self) -> &DataFlowGraph { + &self.top_func().dfg } } diff --git a/crates/interpreter/test_files/arith.sntn b/crates/interpreter/test_files/arith.sntn new file mode 100644 index 0000000..cb7ebe0 --- /dev/null +++ b/crates/interpreter/test_files/arith.sntn @@ -0,0 +1,28 @@ +target = "evm-ethereum-london" + + +#[(0.i8) -> 255.i8] +#[(255.i8) -> 0.i8] +func private %not(v0.i8) -> i8 { + block0: + v1.i8 = not v0; + return v1; +} + +#[(255.i32, 250.i32) -> 505.i32] +#[(-1.i32, 1.i32) -> 0.i32] +func private %add(v0.i32, v1.i32) -> i32 { + block0: + v2.i32 = add v0 v1; + return v2; +} + + +#[(1.i32, 1.i32) -> 0.i32] +#[(-1.i32, -1.i32) -> 0.i32] +func private %sub(v0.i32, v1.i32) -> i32 { + block0: + v2.i32 = sub v0 v1; + return v2; +} + diff --git a/crates/interpreter/test_files/call.sntn b/crates/interpreter/test_files/call.sntn new file mode 100644 index 0000000..22163c5 --- /dev/null +++ b/crates/interpreter/test_files/call.sntn @@ -0,0 +1,17 @@ +target = "evm-ethereum-london" + + +func public %mult_by_two(v0.i8) -> i8 { + block0: + v1.i8 = mul v0 2.i8; + return v1; +} + +#[(0.i8) -> 0.i8] +#[(2.i8) -> 4.i8] +func public %call_test(v0.i8) -> i8 { + block0: + v1.i8 = call %mult_by_two v0; + return v1; +} + diff --git a/crates/interpreter/test_files/control_flow.sntn b/crates/interpreter/test_files/control_flow.sntn new file mode 100644 index 0000000..7159cc8 --- /dev/null +++ b/crates/interpreter/test_files/control_flow.sntn @@ -0,0 +1,50 @@ +target = "evm-ethereum-london" + +#[() -> 0.i1] +func private %jump() -> i1 { + block0: + jump block2; + block1: + return 1.i1; + block2: + return 0.i1; +} + +#[(1.i1) -> 1.i8] +#[(0.i1) -> 2.i8] +func private %branch(v0.i1) -> i8 { + block0: + br v0 block1 block2; + block1: + return 1.i8; + block2: + return 2.i8; +} + +#[(3.i64) -> 1.i64] +#[(10.i64) -> 2.i64] +#[(0.i64) -> 3.i64] +#[(20.i64) -> 3.i64] +func private %br_table(v0.i64) -> i64 { + block0: + br_table v0 block3 (3.i64 block1) (10.i64 block2); + block1: + return 1.i64; + block2: + return 2.i64; + block3: + return 3.i64; +} + + +#[(0.i1) -> 1.i8] +#[(1.i1) -> -1.i8] +func private %simple_phi(v0.i1) -> i8 { + block0: + br v0 block1 block2; + block1: + jump block2; + block2: + v1.i8 = phi (1.i8 block0) (-1.i8 block1); + return v1; +} diff --git a/crates/interpreter/test_files/data.sntn b/crates/interpreter/test_files/data.sntn new file mode 100644 index 0000000..1e125d5 --- /dev/null +++ b/crates/interpreter/test_files/data.sntn @@ -0,0 +1,10 @@ +target = "evm-ethereum-london" + +#[(10.i256, 255.i8) -> 255.i8] +func private %store_load(v0.i256, v1.i8) -> i8 { + block0: + v2.*i8 = int_to_ptr v0 *i8; + mstore v2 v1 i8; + v3.i8 = mload v2 i8; + return v3; +} diff --git a/crates/interpreter/test_files/gep.sntn b/crates/interpreter/test_files/gep.sntn new file mode 100644 index 0000000..3858aa8 --- /dev/null +++ b/crates/interpreter/test_files/gep.sntn @@ -0,0 +1,34 @@ +target = "evm-ethereum-london" + +type @s1 = {i32, i64, i1}; +type @nested = {i32, [i16; 3], [i8; 2]}; + +#[(0.i256) -> 12.i256] +#[(8.i256) -> 20.i256] +func private %gep_basic(v0.i256) -> i256 { + block0: + v1.*@s1 = int_to_ptr v0 *@s1; + v2.*i1 = gep v1 0.i256 2.i256; + v3.i256 = ptr_to_int v2 i256; + return v3; +} + + +#[(0.i256) -> 64.i256] +func private %gep_ptr_ty(v0.i256) -> i256 { + block0: + v1.*[*i32; 3] = int_to_ptr v0 *[*i32; 3]; + v2.**i32 = gep v1 0.i256 2.i256; + v3.i256 = ptr_to_int v2 i256; + return v3; +} + + +#[(0.i256) -> 11.i256] +func private %gep_aggregate(v0.i256) -> i256 { + block0: + v1.*@nested = int_to_ptr v0 *@nested; + v2.*i8 = gep v1 0.i256 2.i256 1.i256; + v3.i256 = ptr_to_int v2 i256; + return v3; +} diff --git a/crates/interpreter/tests/common.rs b/crates/interpreter/tests/common.rs index cc9e6cd..9924eaf 100644 --- a/crates/interpreter/tests/common.rs +++ b/crates/interpreter/tests/common.rs @@ -2,7 +2,7 @@ use std::io::Write; use once_cell::sync::Lazy; use regex::Regex; -use sonatina_interpreter::machine::Machine; +use sonatina_interpreter::Machine; use sonatina_ir::{interpret::EvalValue, module::FuncRef, Immediate}; use sonatina_parser::{ ast::{Value, ValueKind}, @@ -18,7 +18,7 @@ pub fn parse_module(file_path: &str) -> ParsedModule { Err(errs) => { let mut v: Vec = Vec::new(); for err in errs { - err.print(&mut v, &file_path, &content, false).unwrap(); + err.print(&mut v, file_path, &content, false).unwrap(); writeln!(&mut v).unwrap(); } let err_str = String::from_utf8(v).unwrap(); @@ -90,13 +90,18 @@ impl TestCase { &format!("invalid `{comment}`, `#[(args_list) -> ret]` is expected"), )); }; - let args = caps["args"] - .split(",") - .map(|arg| { - let arg = arg.trim(); - parse_value(module, func, arg) - }) - .collect::>()?; + let args = if !caps["args"].is_empty() { + caps["args"] + .split(",") + .map(|arg| { + let arg = arg.trim(); + dbg!(arg); + parse_value(module, func, arg) + }) + .collect::>()? + } else { + vec![] + }; let ret = caps .name("ret") @@ -150,8 +155,8 @@ static PATTERN: Lazy = Lazy::new(|| { Regex::new( r"(?x) \[ - \((?P[a-zA-Z0-9_.@*]+(?:,\s*[a-zA-Z0-9_.@*]+)*,?)\) - (?:\s*->\s*(?P[a-zA-Z0-9_.@*]+))? + \((?P[a-zA-Z0-9_.@*-]*(?:,\s*[a-zA-Z0-9_.@*-]+)*,?)\) + (?:\s*->\s*(?P[a-zA-Z0-9_.@*-]+))? \] ", ) diff --git a/crates/interpreter/tests/test.rs b/crates/interpreter/tests/test.rs index 5b98131..95e4a7f 100644 --- a/crates/interpreter/tests/test.rs +++ b/crates/interpreter/tests/test.rs @@ -2,7 +2,7 @@ mod common; use common::parse_test_cases; use dir_test::{dir_test, Fixture}; -use sonatina_interpreter::machine::Machine; +use sonatina_interpreter::Machine; use sonatina_parser::ParsedModule; #[dir_test( @@ -10,7 +10,7 @@ use sonatina_parser::ParsedModule; glob: "*.sntn", loader: common::parse_module, )] -fn test_arith(fixture: Fixture) { +fn test(fixture: Fixture) { let parsed_module = fixture.into_content(); let test_cases = match parse_test_cases(&parsed_module) { Ok(test_cases) => test_cases,