From 06ac34023fc70cc9162427446067819364fcc207 Mon Sep 17 00:00:00 2001 From: romnnn Date: Thu, 7 Sep 2023 12:31:19 +0200 Subject: [PATCH] cleanup --- WIP.md | 5 +- playground/sys/src/ref/l2_cache_config.cc | 2 +- src/cache/base.rs | 68 +------------------- src/cache/data.rs | 75 +++-------------------- src/cache/l2.rs | 53 ++-------------- 5 files changed, 18 insertions(+), 185 deletions(-) diff --git a/WIP.md b/WIP.md index ddc79047..c7d23637 100644 --- a/WIP.md +++ b/WIP.md @@ -2,6 +2,7 @@ - today: + - use gpu_mem_alloc for the allocations but still allow smart comparision with play whose traces does not include allocations - xtask task for converting traces? or do that in the validation component - validate the number of warp instructions for the execution driven frontend and test that. @@ -19,13 +20,13 @@ - per allocation stats - - fix that cache index unwrapping design - add config parsing for box - generate data for the different parallel implementations speedups - how well does it scale for 20 instead of 80 cores - convert, match and plot statistics - record mem fetch latency in playground and box + - DONE: fix that cache index unwrapping design - DONE: playground stats (should match accelsim) - DONE: builder for mem access as well - DONE: consolidate deterministic parallelism, make rayon optional finally @@ -67,8 +68,6 @@ - todos - - use gpu_mem_alloc for the allocations but still allow smart comparision with play whose traces does not include allocations - - refactor - look into: // TODO HOTFIX: workaround diff --git a/playground/sys/src/ref/l2_cache_config.cc b/playground/sys/src/ref/l2_cache_config.cc index 2fe9e042..3a9faf9d 100644 --- a/playground/sys/src/ref/l2_cache_config.cc +++ b/playground/sys/src/ref/l2_cache_config.cc @@ -11,7 +11,7 @@ unsigned l2_cache_config::set_index(new_addr_type addr) const { if (m_address_mapping) { // Calculate set index without memory partition bits to reduce set camping part_addr = m_address_mapping->partition_address(addr); - fmt::println("partition address for addr {} is {}", addr, part_addr); + // fmt::println("partition address for addr {} is {}", addr, part_addr); } return cache_config::set_index(part_addr); diff --git a/src/cache/base.rs b/src/cache/base.rs index 92dc2263..e47d5a83 100644 --- a/src/cache/base.rs +++ b/src/cache/base.rs @@ -30,37 +30,24 @@ impl PendingRequest {} /// Implements common functions for `read_only_cache` and `data_cache` /// Each subclass implements its own 'access' function #[derive()] -// pub struct Base { -// pub struct Base { pub struct Base { pub name: String, pub core_id: usize, pub cluster_id: usize, pub stats: Arc>, - // pub config: Arc, - // pub cache_config: Arc, pub cache_controller: CC, - // pub cache_controller: tag_array::Pascal, - // pub mem_controller: Box, - // pub phantom: std::marker::PhantomData, - // pub mem_controller: MC, pub cache_config: cache::Config, pub miss_queue: VecDeque, pub miss_queue_status: mem_fetch::Status, pub mshrs: mshr::Table, pub tag_array: tag_array::TagArray, - // pub tag_array: tag_array::TagArray, pending: HashMap, top_port: Option>, - // mem_port: Arc, - // Arc>>, pub bandwidth: super::bandwidth::Manager, } -// impl std::fmt::Debug for Base { -// impl std::fmt::Debug for Base { impl std::fmt::Debug for Base { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.debug_struct("Base") @@ -72,48 +59,21 @@ impl std::fmt::Debug for Base { } } -// pub struct CacheConfig { -// // todo: maybe make that a fifo queue -// pub miss_queue_size: usize, -// } - -// #[derive(Debug)] -// pub struct RemoveMe {} -// -// impl mcu::MemoryController for RemoveMe { -// fn memory_partition_address(&self, addr: address) -> address { -// 0 -// } -// fn to_physical_address(&self, addr: address) -> mcu::TranslatedAddress { -// mcu::TranslatedAddress::default() -// } -// fn num_memory_partitions(&self) -> usize { -// 0 -// } -// fn num_memory_sub_partitions(&self) -> usize { -// 0 -// } -// } - #[derive(Debug, Clone)] -// pub struct Builder { pub struct Builder { pub name: String, pub core_id: usize, pub cluster_id: usize, pub stats: Arc>, pub cache_controller: CC, - // pub mem_controller: MC, pub cache_config: Arc, } -// impl Builder { impl Builder where CC: Clone, { #[must_use] - // pub fn build(self) -> Base { pub fn build(self) -> Base { let cache_config = self.cache_config; let tag_array = @@ -128,7 +88,6 @@ where let bandwidth = super::bandwidth::Manager::new(cache_config.clone()); let cache_config = cache::Config::from(&*cache_config); - // let cache_controller = tag_array::Pascal::new(cache_config.clone()); let miss_queue = VecDeque::with_capacity(cache_config.miss_queue_size); @@ -140,12 +99,7 @@ where mshrs, top_port: None, stats: self.stats, - // config, - // mem_controller: Box::new(ReplaceMe {}), cache_config, - // addr_translation, - // phantom: std::marker::PhantomData, - // mem_controller: self.mem_controller, cache_controller: self.cache_controller, bandwidth, pending: HashMap::new(), @@ -155,7 +109,6 @@ where } } -// impl Base impl Base where CC: CacheAddressTranslation, @@ -237,7 +190,6 @@ where // change address to mshr block address fetch.access.req_size_bytes = self.cache_config.atom_size; - // fetch.data_size = self.cache_config.atom_size(); fetch.access.addr = mshr_addr; self.mshrs.add(mshr_addr, fetch.clone()); @@ -274,13 +226,7 @@ where } } -// impl crate::engine::cycle::Component for Base -// impl crate::engine::cycle::Component for Base -// impl crate::engine::cycle::Component for Base -impl crate::engine::cycle::Component for Base -// where -// I: ic::MemFetchInterface, -{ +impl crate::engine::cycle::Component for Base { /// Sends next request to top memory in the memory hierarchy. fn cycle(&mut self, cycle: u64) { let Some(ref top_level_memory_port) = self.top_port else { @@ -306,7 +252,6 @@ impl crate::engine::cycle::Component for Base } else { fetch.control_size() }; - // if top_level_memory_port.full(fetch.size(), fetch.is_write()) { if top_level_memory_port.can_send(&[packet_size]) { let fetch = self.miss_queue.pop_front().unwrap(); log::debug!( @@ -329,13 +274,7 @@ impl crate::engine::cycle::Component for Base } } -// impl Base -// impl Base -impl Base -// impl Base -// where -// I: ic::MemFetchInterface, -{ +impl Base { /// Checks whether this request can be handled in this cycle. /// /// `n` equals the number of misses to be handled in this cycle. @@ -465,9 +404,6 @@ where } } -// impl super::Bandwidth for Base { -// impl super::Bandwidth for Base { -// impl super::Bandwidth for Base { impl super::Bandwidth for Base { fn has_free_data_port(&self) -> bool { self.bandwidth.has_free_data_port() diff --git a/src/cache/data.rs b/src/cache/data.rs index 1a02bf74..af6f1854 100644 --- a/src/cache/data.rs +++ b/src/cache/data.rs @@ -28,20 +28,12 @@ pub struct Builder { /// at the granularity of individual blocks. /// (the policy used in fermi according to the CUDA manual) #[derive(Debug)] -// pub struct Data { pub struct Data { - // pub inner: cache::base::Base, - // pub inner: cache::base::Base, - // pub inner: cache::base::Base, - // pub inner: cache::base::Base, pub inner: cache::base::Base, /// Memory controller pub mem_controller: MC, - /// Cache controller - // pub cache_controller: CC, - /// Specifies type of write allocate request (e.g., L1 or L2) write_alloc_type: AccessKind, /// Specifies type of writeback request (e.g., L1 or L2) @@ -50,33 +42,14 @@ pub struct Data { impl Builder where - CC: Clone, // where - // MC: Clone, - // impl Data - // impl Data - // where - // I: ic::MemFetchInterface, - // I: Arc>>, + CC: Clone, { - pub fn build( - self, // name: String, - // core_id: usize, - // cluster_id: usize, - // // mem_port: Arc>>, - // stats: Arc>, - // config: Arc, - // cache_config: Arc, - // write_alloc_type: AccessKind, - // write_back_type: AccessKind, - ) -> Data { - // mem_controller: mcu::MemoryControllerUnit::new(&*config).unwrap(), - // let cache_controller = tag_array::Pascal::new((&*self.cache_config).into()); + pub fn build(self) -> Data { let inner = super::base::Builder { name: self.name, core_id: self.core_id, cluster_id: self.cluster_id, stats: self.stats, - // mem_controller: self.mem_controller.clone(), cache_controller: self.cache_controller, cache_config: self.cache_config, } @@ -97,22 +70,11 @@ impl Data { } } -// impl Data { impl Data where MC: MemoryController, CC: CacheAddressTranslation, { - // #[inline] - // pub fn set_top_port(&mut self, port: ic::Port) { - // self.inner.set_top_port(port); - // } - - // #[must_use] - // pub fn cache_config(&self) -> &Arc { - // &self.inner.cache_config - // } - /// Write-back hit: mark block as modified. fn write_hit_write_back( &mut self, @@ -499,7 +461,6 @@ where sector_mask: evicted.sector_mask, } .build(); - // let control_size = writeback_access.control_size(); // the evicted block may have wrong chip id when advanced L2 hashing // is used, so set the right chip address from the original mf @@ -511,15 +472,12 @@ where tlx_addr.sub_partition = fetch.tlx_addr.sub_partition; let partition_addr = self - // .inner .mem_controller .memory_partition_address(writeback_access.addr); let writeback_fetch = mem_fetch::Builder { instr: None, access: writeback_access, - // &self.inner.config, - // control_size, warp_id: 0, core_id: 0, cluster_id: 0, @@ -552,10 +510,7 @@ where ) -> cache::RequestStatus { use cache::config::WriteAllocatePolicy; let func = match self.inner.cache_config.write_allocate_policy { - WriteAllocatePolicy::NO_WRITE_ALLOCATE => { - // unimplemented!("no write allocate"); - Self::write_miss_no_write_allocate - } + WriteAllocatePolicy::NO_WRITE_ALLOCATE => Self::write_miss_no_write_allocate, WriteAllocatePolicy::WRITE_ALLOCATE => Self::write_miss_write_allocate_naive, WriteAllocatePolicy::FETCH_ON_WRITE => { // Self::write_miss_write_allocate_fetch_on_write @@ -736,11 +691,7 @@ where } } -impl crate::engine::cycle::Component for Data -// impl crate::engine::cycle::Component for Data -// where -// I: ic::MemFetchInterface, -{ +impl crate::engine::cycle::Component for Data { fn cycle(&mut self, cycle: u64) { self.inner.cycle(cycle); } @@ -748,11 +699,8 @@ impl crate::engine::cycle::Component for Data impl cache::Cache for Data where - CC: CacheAddressTranslation, // impl cache::Cache for Data - MC: MemoryController, // impl cache::Cache for Data - // impl cache::Cache for Data - // where - // I: ic::MemFetchInterface + 'static, + CC: CacheAddressTranslation, + MC: MemoryController, { fn as_any(&self) -> &dyn std::any::Any { self @@ -790,7 +738,6 @@ where let dbg_fetch = fetch.clone(); - // let (cache_index, probe_status) = self let probe = self .inner .tag_array @@ -800,11 +747,7 @@ where .unwrap_or(cache::RequestStatus::RESERVATION_FAIL); // dbg!((cache_index, probe_status)); - let access_status = self.process_tag_probe( - is_write, probe, // probe_status, - // cache_index, - addr, fetch, events, time, - ); + let access_status = self.process_tag_probe(is_write, probe, addr, fetch, events, time); // dbg!(&access_status); log::debug!( @@ -824,7 +767,7 @@ where cache::RequestStatus::SECTOR_MISS if access_status != cache::RequestStatus::MISS => { probe_status } - _status => access_status, + _ => access_status, }; let mut stats = self.inner.stats.lock(); stats.inc( @@ -871,8 +814,6 @@ where } } -// impl cache::Bandwidth for Data { -// impl cache::Bandwidth for Data { impl cache::Bandwidth for Data { fn has_free_data_port(&self) -> bool { self.inner.has_free_data_port() diff --git a/src/cache/l2.rs b/src/cache/l2.rs index ff9afead..81ced1ea 100644 --- a/src/cache/l2.rs +++ b/src/cache/l2.rs @@ -9,23 +9,10 @@ where MC: std::fmt::Debug, CC: std::fmt::Debug, { - // set_index_function: crate::set_index::linear::SetIndex, - // config: cache::Config, memory_controller: MC, cache_controller: CC, - // inner: crate::tag_array::Pascal, - // inner: crate::tag_array::Pascal, } -// impl Pascal { -// pub fn new(config: cache::Config) -> Self { -// Self { -// config, -// set_index_function: crate::set_index::linear::SetIndex::default(), -// } -// } -// } - impl crate::tag_array::CacheAddressTranslation for L2CacheController where MC: mcu::MemoryController, @@ -33,28 +20,23 @@ where { #[inline] fn tag(&self, addr: address) -> address { - // todo!(); self.cache_controller.tag(addr) } #[inline] fn block_addr(&self, addr: address) -> address { - // todo!(); self.cache_controller.block_addr(addr) } #[inline] fn set_index(&self, addr: address) -> u64 { - // todo!(); - // let partition_addr = addr; let partition_addr = self.memory_controller.memory_partition_address(addr); - println!("partition address for addr {} is {}", addr, partition_addr); + // println!("partition address for addr {} is {}", addr, partition_addr); self.cache_controller.set_index(partition_addr) } #[inline] fn mshr_addr(&self, addr: address) -> address { - // todo!(); self.cache_controller.mshr_addr(addr) } } @@ -62,32 +44,23 @@ where /// Generic data cache. #[derive(Debug)] #[allow(clippy::module_name_repetitions)] -// pub struct DataL2 { pub struct DataL2 { pub inner: super::data::Data< mcu::MemoryControllerUnit, L2CacheController, >, - // pub inner: super::data::Data, pub cache_config: Arc, } -impl DataL2 -// impl DataL2 -// where -// // I: ic::MemFetchInterface, -// I: crate::fifo::Queue, -{ +impl DataL2 { pub fn new( name: String, core_id: usize, cluster_id: usize, - // fetch_interconn: Arc, stats: Arc>, config: Arc, cache_config: Arc, ) -> Self { - // let memory_controller = config.address_mapping().clone(); let mem_controller = mcu::MemoryControllerUnit::new(&*config).unwrap(); let cache_controller = L2CacheController { memory_controller: mem_controller.clone(), @@ -108,10 +81,6 @@ impl DataL2 write_back_type: AccessKind::L2_WRBK_ACC, } .build(); - // TODO: crate a builder for data cache and base cache - // inner.inner.addr_translation = L2CacheController { - // // inner: mmeory_controller, - // }; Self { inner, cache_config, @@ -124,21 +93,13 @@ impl DataL2 } } -impl crate::engine::cycle::Component for DataL2 -// impl crate::engine::cycle::Component for DataL2 -// where -// I: ic::MemFetchInterface, -{ +impl crate::engine::cycle::Component for DataL2 { fn cycle(&mut self, cycle: u64) { self.inner.cycle(cycle); } } -impl super::Cache for DataL2 -// impl super::Cache for DataL2 -// where -// I: ic::MemFetchInterface + 'static, -{ +impl super::Cache for DataL2 { #[inline] fn as_any(&self) -> &dyn std::any::Any { self @@ -219,11 +180,7 @@ impl super::Cache for DataL2 } } -impl super::Bandwidth for DataL2 -// impl super::Bandwidth for DataL2 -// where -// I: ic::MemFetchInterface, -{ +impl super::Bandwidth for DataL2 { fn has_free_data_port(&self) -> bool { self.inner.has_free_data_port() }