diff --git a/WIP.md b/WIP.md index 844b6fa0..eb4c4d75 100644 --- a/WIP.md +++ b/WIP.md @@ -7,11 +7,11 @@ - today: - - execution driven frontend - record mem fetch latency - add a few more stats - - refactor events - lint + - DONE: execution driven frontend + - DONE: refactor events - DONE: add rop delay queue - tomorrow: diff --git a/examples/vectoradd.rs b/examples/vectoradd.rs index c85dd743..317c6a9a 100644 --- a/examples/vectoradd.rs +++ b/examples/vectoradd.rs @@ -89,8 +89,6 @@ where total_sum / T::from(n).unwrap() ); - // dbg!(&stats.sim); - // dbg!(box_stats.l1i_stats.reduce()) eprintln!("STATS:\n"); eprintln!("DRAM: total reads: {}", &stats.dram.total_reads()); eprintln!("DRAM: total writes: {}", &stats.dram.total_writes()); @@ -126,9 +124,6 @@ mod tests { let rmp_trace_file_path = traces_dir.join("kernel-0.msgpack"); dbg!(&rmp_trace_file_path); - // let sim = casimu::Simulation::new(); - // sim.read_trace(rmp_trace_file_path)?; - // dbg!(&sim.stats.lock().unwrap()); let mut reader = utils::fs::open_readable(rmp_trace_file_path)?; let full_trace: model::MemAccessTrace = rmp_serde::from_read(&mut reader)?; let warp_traces = full_trace.to_warp_traces(); diff --git a/src/cache/bandwidth.rs b/src/cache/bandwidth.rs index 135df501..412e60eb 100644 --- a/src/cache/bandwidth.rs +++ b/src/cache/bandwidth.rs @@ -52,9 +52,7 @@ impl BandwidthManager { super::RequestStatus::HIT_RESERVED | super::RequestStatus::MISS => { // the data array is accessed to read out the entire line for write-back // in case of sector cache we need to write bank only the modified sectors - if let Some(evicted) = super::event::was_writeback_sent(events) - .and_then(|wb| wb.evicted_block.as_ref()) - { + if let Some(evicted) = super::event::was_writeback_sent(events) { let data_cycles = evicted.modified_size / port_width; self.data_port_occupied_cycles += data_cycles as usize; log::trace!( diff --git a/src/cache/base.rs b/src/cache/base.rs index dedb0deb..6b965d63 100644 --- a/src/cache/base.rs +++ b/src/cache/base.rs @@ -231,8 +231,7 @@ impl Base { self.miss_queue.push_back(fetch.clone()); fetch.set_status(self.miss_queue_status, time); if !write_allocate { - let event = super::event::Event::new(super::event::Kind::READ_REQUEST_SENT); - events.push(event); + events.push(super::event::Event::ReadRequestSent); } should_miss = true; diff --git a/src/cache/data.rs b/src/cache/data.rs index 0d0773bb..424879a6 100644 --- a/src/cache/data.rs +++ b/src/cache/data.rs @@ -244,9 +244,8 @@ where // address from the original mf writeback_fetch.tlx_addr.chip = fetch.tlx_addr.chip; writeback_fetch.tlx_addr.sub_partition = fetch.tlx_addr.sub_partition; - let event = event::Event { - kind: event::Kind::WRITE_BACK_REQUEST_SENT, - evicted_block: None, // drop evicted? + let event = event::Event::WriteBackRequestSent { + evicted_block: None, }; log::trace!( @@ -292,10 +291,7 @@ where } // on miss, generate write through - let event = event::Event { - kind: event::Kind::WRITE_REQUEST_SENT, - evicted_block: None, - }; + let event = event::Event::WriteRequestSent; self.send_write_request(fetch, event, time, events); cache::RequestStatus::MISS } @@ -347,11 +343,7 @@ where return cache::RequestStatus::RESERVATION_FAIL; } - let event = event::Event { - kind: event::Kind::WRITE_REQUEST_SENT, - evicted_block: None, - }; - + let event = event::Event::WriteRequestSent; self.send_write_request(fetch.clone(), event, time, events); let is_write = false; @@ -390,10 +382,7 @@ where is_write_allocate, ); - events.push(event::Event { - kind: event::Kind::WRITE_ALLOCATE_SENT, - evicted_block: None, - }); + events.push(event::Event::WriteAllocateSent); if should_miss { // If evicted block is modified and not a write-through @@ -438,8 +427,7 @@ where // is used, so set the right chip address from the original mf writeback_fetch.tlx_addr.chip = fetch.tlx_addr.chip; writeback_fetch.tlx_addr.sub_partition = fetch.tlx_addr.sub_partition; - let event = event::Event { - kind: event::Kind::WRITE_BACK_REQUEST_SENT, + let event = event::Event::WriteBackRequestSent { evicted_block: Some(evicted), }; diff --git a/src/cache/event.rs b/src/cache/event.rs index 60c66653..86307d0e 100644 --- a/src/cache/event.rs +++ b/src/cache/event.rs @@ -4,52 +4,41 @@ use crate::tag_array; pub fn was_write_sent(events: &[Event]) -> bool { events .iter() - .any(|event| event.kind == Kind::WRITE_REQUEST_SENT) + .any(|event| matches!(event, Event::WriteRequestSent)) } #[must_use] -pub fn was_writeback_sent(events: &[Event]) -> Option<&Event> { +pub fn was_writeback_sent(events: &[Event]) -> Option<&tag_array::EvictedBlockInfo> { events .iter() - .find(|event| event.kind == Kind::WRITE_BACK_REQUEST_SENT) + .find_map(|event| match event { + Event::WriteBackRequestSent { evicted_block } => Some(evicted_block.as_ref()), + _ => None, + }) + .flatten() } #[must_use] pub fn was_read_sent(events: &[Event]) -> bool { events .iter() - .any(|event| event.kind == Kind::READ_REQUEST_SENT) + .any(|event| matches!(event, Event::ReadRequestSent)) } #[must_use] pub fn was_writeallocate_sent(events: &[Event]) -> bool { events .iter() - .any(|event| event.kind == Kind::WRITE_ALLOCATE_SENT) -} - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub enum Kind { - WRITE_BACK_REQUEST_SENT, - READ_REQUEST_SENT, - WRITE_REQUEST_SENT, - WRITE_ALLOCATE_SENT, + .any(|event| matches!(event, Event::WriteAllocateSent)) } #[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct Event { - pub kind: Kind, - - // if it was write_back event, fill the the evicted block info - pub evicted_block: Option, -} - -impl Event { - #[must_use] - pub fn new(kind: Kind) -> Self { - Self { - kind, - evicted_block: None, - } - } +pub enum Event { + WriteBackRequestSent { + // if it was write_back event, fill the the evicted block info + evicted_block: Option, + }, + ReadRequestSent, + WriteRequestSent, + WriteAllocateSent, } diff --git a/src/exec/mod.rs b/src/exec/mod.rs index 37e26424..bf05724a 100644 --- a/src/exec/mod.rs +++ b/src/exec/mod.rs @@ -183,7 +183,7 @@ impl Simulation { ) -> DevicePtr<'s, 'a, T> { let mut offset_lock = self.offset.lock().unwrap(); let offset = *offset_lock; - *offset_lock += size as u64; + *offset_lock += size; self.inner.lock().unwrap().gpu_mem_alloc(offset, size, None); self.inner @@ -199,48 +199,6 @@ impl Simulation { } } - // /// Read a trace. - // /// - // /// # Errors - // /// When trace cannot be read. - // pub fn read_trace

(&self, path: P) -> Result<(), TraceError> - // where - // P: AsRef, - // { - // use serde::Deserializer; - // let file = std::fs::OpenOptions::new().read(true).open(path.as_ref())?; - // let reader = std::io::BufReader::new(file); - // let mut reader = rmp_serde::Deserializer::new(reader); - // let decoder = nvbit_io::Decoder::new(|access: model::MemAccessTraceEntry| { - // // log::trace!("{:#?}", &access); - // - // // create a new warp here - // if access.instr_is_load { - // // todo: we should somehow get the size of each load - // let loads = access - // .addrs - // .into_iter() - // .filter(|addr| *addr > 0) - // .map(|addr| (addr, 4)); - // self.warp_load(loads); - // } else { - // // todo: we should somehow get the size of each store - // let stores = access - // .addrs - // .into_iter() - // .filter(|addr| *addr > 0) - // .map(|addr| (addr, 4)); - // self.warp_store(stores); - // } - // // todo: flush a thread here? is this the wrong granularity? - // // edit: i dont think so, this is one warp instruction so we can do that here - // // do not forget to call ... - // self.flush(); - // }); - // reader.deserialize_seq(decoder)?; - // Ok(()) - // } - pub fn run_to_completion(&self) -> eyre::Result { let mut inner = self.inner.lock().unwrap(); inner.run_to_completion()?; @@ -261,38 +219,14 @@ impl Simulation { G: Into, B: Into, K: Kernel, - // K::Error: Send, { let grid: model::Dim = grid.into(); let block_size: model::Dim = block_size.into(); - dbg!(&grid); - dbg!(&block_size); - - // use rayon::prelude::*; - // let test: Vec<_> = grid - // .into_iter() - // .flat_map(|block_id| { - // block_size - // .into_iter() - // .map(move |thread_id| (block_id, thread_id)) - // }) - // .collect(); - // - // test.iter().try_for_each(|(block_id, thread_id)| { - // let mut thread_idx = ThreadIndex { - // block_idx: model::Dim::from(*block_id), - // block_dim: block_size, - // thread_idx: block_size, - // }; - // - // thread_idx.thread_idx = model::Dim::from(*thread_id); - // kernel.run(&thread_idx) - // })?; let mut trace = Vec::new(); // loop over the grid - for block_id in grid.clone().into_iter() { + for block_id in grid.clone() { log::debug!("block {}", &block_id); let mut thread_id = ThreadIndex { @@ -333,7 +267,7 @@ impl Simulation { warp_size: WARP_SIZE, line_num: 0, instr_data_width: 0, - instr_opcode: "".to_string(), + instr_opcode: String::new(), instr_offset: 0, instr_idx: 0, instr_predicate: nvbit_model::Predicate::default(), @@ -370,7 +304,7 @@ impl Simulation { // ); assert_eq!(instructions.len(), WARP_SIZE as usize); - let first_valid = instructions.iter().find_map(|x| x.as_ref()); + let first_valid = instructions.iter().find_map(std::option::Option::as_ref); if let Some(WarpInstruction::Access(access)) = first_valid { let accesses: Vec<_> = instructions @@ -404,7 +338,7 @@ impl Simulation { MemorySpace::Shared if is_store => "STS".to_string(), // MemorySpace::Texture if is_store => "LDG".to_string(), MemorySpace::Constant if is_store => panic!("constant store"), - other => panic!("unknown memory space {:?}", other), + other => panic!("unknown memory space {other:?}"), }; trace.push(model::MemAccessTraceEntry { @@ -428,19 +362,6 @@ impl Simulation { ..warp_instruction.clone() }); - // add warp instructions - // for instruction in warp_instructions.into_iter().enumerate() { - // trace.push(match instruction { - // WarpInstruction::Access(access) => model::MemAccessTraceEntry { - // instr_idx: ..warp_instruction, - // }, - // }); - // } - - // for instructions in warp_instructions.iter_mut() { - // instructions.clear(); - // } - // log::info!("END WARP #{} ({:?})", &warp_id_in_block, &thread_id); } } @@ -455,8 +376,8 @@ impl Simulation { .collect::>()); let launch_config = model::KernelLaunch { - name: "".to_string(), - trace_file: "".to_string(), + name: String::new(), + trace_file: String::new(), id: self.kernel_id.fetch_add(1, atomic::Ordering::SeqCst), grid, block: block_size, diff --git a/src/kernel.rs b/src/kernel.rs index 8d7af550..1c4dfe7c 100644 --- a/src/kernel.rs +++ b/src/kernel.rs @@ -6,7 +6,7 @@ use color_eyre::{ use std::collections::HashSet; use std::path::Path; use std::sync::{Mutex, RwLock}; -use std::time::Instant; + use trace_model as model; pub fn read_trace(path: impl AsRef) -> eyre::Result { @@ -52,7 +52,7 @@ impl std::fmt::Display for Kernel { } impl Kernel { - pub fn new(config: model::KernelLaunch, trace: model::MemAccessTrace) -> Self { + #[must_use] pub fn new(config: model::KernelLaunch, trace: model::MemAccessTrace) -> Self { // sanity check assert!(trace.is_valid()); diff --git a/src/testing/exec.rs b/src/testing/exec.rs index 85c9de89..6462f627 100644 --- a/src/testing/exec.rs +++ b/src/testing/exec.rs @@ -1,6 +1,6 @@ use crate::exec; use color_eyre::eyre; -use num_traits::{Float, NumCast, Zero}; +use num_traits::{Float, Zero}; #[test] fn vectoradd() -> eyre::Result<()> {