diff --git a/difftest/online_drive/src/dpi.rs b/difftest/online_drive/src/dpi.rs index ca4525c80..fb360386b 100644 --- a/difftest/online_drive/src/dpi.rs +++ b/difftest/online_drive/src/dpi.rs @@ -22,28 +22,23 @@ pub(crate) struct AxiReadPayload { pub(crate) data: Vec, } -fn write_to_pointer(dst: *mut u8, data: &Vec, n: usize) { - unsafe { - for i in 0..n { - ptr::write(dst.add(i), data[i]); - } - } +unsafe fn write_to_pointer(dst: *mut u8, data: &[u8]) { + let dst = std::slice::from_raw_parts_mut(dst, data.len()); + dst.copy_from_slice(data); } unsafe fn fill_axi_read_payload(dst: *mut SvBitVecVal, dlen: u32, payload: &AxiReadPayload) { let data_len = 256 * (dlen / 8) as usize; assert!(payload.data.len() <= data_len); - let dst = dst as *mut u8; - write_to_pointer(dst, &payload.data, payload.data.len()); + write_to_pointer(dst as *mut u8, &payload.data); } // Return (strobe in bit, data in byte) -unsafe fn load_from_payload( - payload: &*const SvBitVecVal, - aw_size: c_longlong, +unsafe fn load_from_payload<'a>( + payload: *const SvBitVecVal, data_width: u32, -) -> (Vec, &[u8]) { - let src = *payload as *mut u8; +) -> (Vec, &'a [u8]) { + let src = payload as *mut u8; let data_width_in_byte = (data_width / 8) as usize; let strb_width_in_byte = data_width_in_byte.div_ceil(8); // ceil divide by 8 to get byte width let payload_size_in_byte = strb_width_in_byte + data_width_in_byte; // data width in byte @@ -132,7 +127,7 @@ unsafe extern "C" fn axi_write_highBandwidthPort( ); let mut driver = DPI_TARGET.lock().unwrap(); let driver = driver.as_mut().unwrap(); - let (strobe, data) = load_from_payload(&payload, awsize, driver.dlen); + let (strobe, data) = load_from_payload(payload, driver.dlen); driver.axi_write_high_bandwidth(awaddr as u32, awsize as u64, &strobe, data); } @@ -216,7 +211,7 @@ unsafe extern "C" fn axi_write_indexedAccessPort( ); let mut driver = DPI_TARGET.lock().unwrap(); let driver = driver.as_mut().unwrap(); - let (strobe, data) = load_from_payload(&payload, awsize, 32); + let (strobe, data) = load_from_payload(payload, 32); driver.axi_write_indexed_access_port(awaddr as u32, awsize as u64, &strobe, data); } diff --git a/difftest/online_drive/src/drive.rs b/difftest/online_drive/src/drive.rs index e73637a3c..48644a548 100644 --- a/difftest/online_drive/src/drive.rs +++ b/difftest/online_drive/src/drive.rs @@ -8,6 +8,91 @@ use tracing::{debug, error, info, trace}; use crate::dpi::*; use crate::OfflineArgs; +struct ShadowMem { + mem: Vec, +} + +impl ShadowMem { + pub fn new() -> Self { + Self { mem: vec![0; MEM_SIZE] } + } + pub fn apply_writes(&mut self, records: &MemAccessRecord) { + for (&addr, record) in &records.all_writes { + if let Some(write) = record.writes.last() { + self.mem[addr as usize] = write.val; + } + } + } + + pub fn read_mem(&self, addr: u32, size: u32) -> &[u8] { + let start = addr as usize; + let end = (addr + size) as usize; + &self.mem[start..end] + } + + // size: 1 << arsize + // bus_size: AXI bus width in bytes + // return: Vec with len=bus_size + // if size < bus_size, the result is padded due to AXI narrow transfer rules + pub fn read_mem_axi(&self, addr: u32, size: u32, bus_size: u32) -> Vec { + assert!( + addr % size == 0 && bus_size % size == 0, + "unaligned access addr={addr:#x} size={size}B dlen={bus_size}B" + ); + + let data = self.read_mem(addr, size); + if size < bus_size { + // narrow + let mut data_padded = vec![0; bus_size as usize]; + let start = (addr % bus_size) as usize; + let end = start + data.len(); + data_padded[start..end].copy_from_slice(data); + + data_padded + } else { + // normal + data.to_vec() + } + } + + // size: 1 << awsize + // bus_size: AXI bus width in bytes + // masks: write strokes, len=bus_size + // data: write data, len=bus_size + pub fn write_mem_axi( + &mut self, + addr: u32, + size: u32, + bus_size: u32, + masks: &[bool], + data: &[u8], + ) { + assert!( + addr % size == 0 && bus_size % size == 0, + "unaligned write access addr={addr:#x} size={size}B dlen={bus_size}B" + ); + + // handle strb=0 AXI payload + if !masks.iter().any(|&x| x) { + trace!("Mask 0 write detect"); + return; + } + + // TODO: we do not check strobe is compatible with (addr, awsize) + let addr_align = addr & ((!bus_size) + 1); + + let bus_size = bus_size as usize; + assert_eq!(bus_size, masks.len()); + assert_eq!(bus_size, data.len()); + + for i in 0..bus_size { + if masks[i] { + self.mem[addr_align as usize + i] = data[i]; + } + } + } +} + pub(crate) struct Driver { spike_runner: SpikeRunner, @@ -29,7 +114,7 @@ pub(crate) struct Driver { issued: u64, vector_lsu_count: u8, - shadow_mem: Vec, + shadow_mem: ShadowMem, } #[cfg(feature = "trace")] @@ -87,70 +172,17 @@ impl Driver { issued: 0, vector_lsu_count: 0, - shadow_mem: vec![0; MEM_SIZE], + shadow_mem: ShadowMem::new(), }; self_.spike_runner.load_elf(&args.common_args.elf_file).unwrap(); - load_elf_to_buffer(&mut self_.shadow_mem, &args.common_args.elf_file).unwrap(); + load_elf_to_buffer(&mut self_.shadow_mem.mem, &args.common_args.elf_file).unwrap(); self_ } - fn apply_to_shadow_mem(&mut self, record: &MemAccessRecord) { - for (addr, record) in &record.all_writes { - for write in &record.writes { - self.shadow_mem[*addr as usize] = write.val; - } - } - } - - fn read_mem(&mut self, addr: u32, size: u32, alignment_bytes: u32) -> Vec { - assert!( - addr % size == 0 || addr % alignment_bytes == 0, - "unaligned access addr={addr:#x} size={size}bytes dlen={alignment_bytes}bytes" - ); - let residue_addr = addr % alignment_bytes; - let aligned_addr = addr - residue_addr; - if size < alignment_bytes { - // narrow - (0..alignment_bytes) - .map(|i| { - let i_addr = aligned_addr + i; - if addr <= i_addr && i_addr < addr + size { - self.shadow_mem[i_addr as usize] - } else { - 0 - } - }) - .collect() - } else { - // normal - (0..size).map(|i| self.shadow_mem[(addr + i) as usize]).collect() - } - } - - fn write_mem(&mut self, addr: u32, alignment_bytes: u32, masks: &[bool], data: &[u8]) { - // handle strb=0 AXI payload - if !masks.iter().any(|&x| x) { - trace!("Mask 0 write detect"); - return; - } - - let size = data.len() as u32; - debug!("write mem: size={size}, addr={addr:#x}"); - - assert!( - addr % size == 0 || addr % alignment_bytes == 0, - "unaligned write access addr={addr:#x} size={size}bytes dlen={alignment_bytes}bytes" - ); - - masks.iter().enumerate().filter(|(_, &m)| m).for_each(|(i, _)| { - self.shadow_mem[addr as usize + i] = data[i]; - }); - } - pub(crate) fn axi_read_high_bandwidth(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { let size = 1 << arsize; - let data = self.read_mem(addr, size, self.dlen / 8); + let data = self.shadow_mem.read_mem_axi(addr, size, self.dlen / 8); let data_hex = hex::encode(&data); trace!( "[{}] axi_read_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", @@ -168,7 +200,7 @@ impl Driver { ) { let size = 1 << awsize; - self.write_mem(addr, self.dlen / 8, &strobe, data); + self.shadow_mem.write_mem_axi(addr, size, self.dlen / 8, &strobe, data); let data_hex = hex::encode(data); trace!( "[{}] axi_write_high_bandwidth (addr={addr:#x}, size={size}, data={data_hex})", @@ -179,7 +211,7 @@ impl Driver { pub(crate) fn axi_read_indexed(&mut self, addr: u32, arsize: u64) -> AxiReadPayload { let size = 1 << arsize; assert!(size <= 4); - let data = self.read_mem(addr, size, 4); + let data = self.shadow_mem.read_mem_axi(addr, size, 4); let data_hex = hex::encode(&data); trace!( "[{}] axi_read_indexed (addr={addr:#x}, size={size}, data={data_hex})", @@ -196,7 +228,7 @@ impl Driver { data: &[u8], ) { let size = 1 << awsize; - self.write_mem(addr, 4, strobe, data); + self.shadow_mem.write_mem_axi(addr, size, 4, strobe, data); let data_hex = hex::encode(data); trace!( "[{}] axi_write_indexed_access_port (addr={addr:#x}, size={size}, data={data_hex})", @@ -296,7 +328,7 @@ impl Driver { ); if self.vector_lsu_count == 0 { // issue scalar load / store - self.apply_to_shadow_mem(&se.mem_access_record); + self.shadow_mem.apply_writes(&se.mem_access_record); self.spike_runner.commit_queue.pop_front(); continue; } else { @@ -333,10 +365,8 @@ impl Driver { pub(crate) fn retire_instruction(&mut self, _: &Retire) { let se = self.spike_runner.commit_queue.back().unwrap(); - // we make a copy of mem_access_record to circumvent the borrow checker // todo: filter all vector instruction. - let mem_access_record = se.mem_access_record.to_owned(); - self.apply_to_shadow_mem(&mem_access_record); + self.shadow_mem.apply_writes(&se.mem_access_record); self.spike_runner.commit_queue.pop_back(); self.last_commit_cycle = get_t(); diff --git a/difftest/spike_rs/src/util.rs b/difftest/spike_rs/src/util.rs index b7fbc5c42..6ded0eec5 100644 --- a/difftest/spike_rs/src/util.rs +++ b/difftest/spike_rs/src/util.rs @@ -33,7 +33,7 @@ pub fn load_elf(spike: &mut Spike, fname: &Path) -> anyhow::Result { } // todo: unify load_elf and load_elf_to_buffer -pub fn load_elf_to_buffer(mem: &mut Vec, fname: &Path) -> anyhow::Result { +pub fn load_elf_to_buffer(mem: &mut [u8], fname: &Path) -> anyhow::Result { let mut file = File::open(fname).unwrap(); let mut buffer = Vec::new(); file.read_to_end(&mut buffer).unwrap();