cleanup

romnn · Sep 7, 2023 · 06ac340 · 06ac340
1 parent a176124
commit 06ac340
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 185 deletions.
diff --git a/WIP.md b/WIP.md
@@ -2,6 +2,7 @@
 
 - today:
 
+  - use gpu_mem_alloc for the allocations but still allow smart comparision with play whose traces does not include allocations
   - xtask task for converting traces? or do that in the validation component
 
   - validate the number of warp instructions for the execution driven frontend and test that.
@@ -19,13 +20,13 @@
 
     - per allocation stats
 
-  - fix that cache index unwrapping design
   - add config parsing for box
   - generate data for the different parallel implementations speedups
     - how well does it scale for 20 instead of 80 cores
   - convert, match and plot statistics
   - record mem fetch latency in playground and box
 
+  - DONE: fix that cache index unwrapping design
   - DONE: playground stats (should match accelsim)
   - DONE: builder for mem access as well
   - DONE: consolidate deterministic parallelism, make rayon optional finally
@@ -67,8 +68,6 @@
 
   - todos
 
-    - use gpu_mem_alloc for the allocations but still allow smart comparision with play whose traces does not include allocations
-
   - refactor
 
     - look into: // TODO HOTFIX: workaround

diff --git a/playground/sys/src/ref/l2_cache_config.cc b/playground/sys/src/ref/l2_cache_config.cc
@@ -11,7 +11,7 @@ unsigned l2_cache_config::set_index(new_addr_type addr) const {
   if (m_address_mapping) {
     // Calculate set index without memory partition bits to reduce set camping
     part_addr = m_address_mapping->partition_address(addr);
-    fmt::println("partition address for addr {} is {}", addr, part_addr);
+    // fmt::println("partition address for addr {} is {}", addr, part_addr);
   }
 
   return cache_config::set_index(part_addr);

diff --git a/src/cache/base.rs b/src/cache/base.rs
@@ -30,37 +30,24 @@ impl PendingRequest {}
 /// Implements common functions for `read_only_cache` and `data_cache`
 /// Each subclass implements its own 'access' function
 #[derive()]
-// pub struct Base {
-// pub struct Base<MC, CC> {
 pub struct Base<CC> {
     pub name: String,
     pub core_id: usize,
     pub cluster_id: usize,
 
     pub stats: Arc<Mutex<stats::Cache>>,
-    // pub config: Arc<config::GPU>,
-    // pub cache_config: Arc<config::Cache>,
     pub cache_controller: CC,
-    // pub cache_controller: tag_array::Pascal,
-    // pub mem_controller: Box<dyn mcu::MemoryController>,
-    // pub phantom: std::marker::PhantomData<MC>,
-    // pub mem_controller: MC,
     pub cache_config: cache::Config,
 
     pub miss_queue: VecDeque<mem_fetch::MemFetch>,
     pub miss_queue_status: mem_fetch::Status,
     pub mshrs: mshr::Table<mem_fetch::MemFetch>,
     pub tag_array: tag_array::TagArray<cache::block::Line, CC>,
-    // pub tag_array: tag_array::TagArray<cache::block::Line, tag_array::Pascal>,
     pending: HashMap<mem_fetch::MemFetch, PendingRequest>,
     top_port: Option<ic::Port<mem_fetch::MemFetch>>,
-    // mem_port: Arc<I>,
-    // Arc<Mutex<crate::fifo::Fifo<mem_fetch::MemFetch>>>,
     pub bandwidth: super::bandwidth::Manager,
 }
 
-// impl std::fmt::Debug for Base {
-// impl<MC, CC> std::fmt::Debug for Base<MC, CC> {
 impl<CC> std::fmt::Debug for Base<CC> {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         f.debug_struct("Base")
@@ -72,48 +59,21 @@ impl<CC> std::fmt::Debug for Base<CC> {
     }
 }
 
-// pub struct CacheConfig {
-//     // todo: maybe make that a fifo queue
-//     pub miss_queue_size: usize,
-// }
-
-// #[derive(Debug)]
-// pub struct RemoveMe {}
-//
-// impl mcu::MemoryController for RemoveMe {
-//     fn memory_partition_address(&self, addr: address) -> address {
-//         0
-//     }
-//     fn to_physical_address(&self, addr: address) -> mcu::TranslatedAddress {
-//         mcu::TranslatedAddress::default()
-//     }
-//     fn num_memory_partitions(&self) -> usize {
-//         0
-//     }
-//     fn num_memory_sub_partitions(&self) -> usize {
-//         0
-//     }
-// }
-
 #[derive(Debug, Clone)]
-// pub struct Builder<MC, CC> {
 pub struct Builder<CC> {
     pub name: String,
     pub core_id: usize,
     pub cluster_id: usize,
     pub stats: Arc<Mutex<stats::Cache>>,
     pub cache_controller: CC,
-    // pub mem_controller: MC,
     pub cache_config: Arc<config::Cache>,
 }
 
-// impl<MC, CC> Builder<MC, CC> {
 impl<CC> Builder<CC>
 where
     CC: Clone,
 {
     #[must_use]
-    // pub fn build(self) -> Base<MC, CC> {
     pub fn build(self) -> Base<CC> {
         let cache_config = self.cache_config;
         let tag_array =
@@ -128,7 +88,6 @@ where
         let bandwidth = super::bandwidth::Manager::new(cache_config.clone());
 
         let cache_config = cache::Config::from(&*cache_config);
-        // let cache_controller = tag_array::Pascal::new(cache_config.clone());
 
         let miss_queue = VecDeque::with_capacity(cache_config.miss_queue_size);
 
@@ -140,12 +99,7 @@ where
             mshrs,
             top_port: None,
             stats: self.stats,
-            // config,
-            // mem_controller: Box::new(ReplaceMe {}),
             cache_config,
-            // addr_translation,
-            // phantom: std::marker::PhantomData,
-            // mem_controller: self.mem_controller,
             cache_controller: self.cache_controller,
             bandwidth,
             pending: HashMap::new(),
@@ -155,7 +109,6 @@ where
     }
 }
 
-// impl<MC, CC> Base<MC, CC>
 impl<CC> Base<CC>
 where
     CC: CacheAddressTranslation,
@@ -237,7 +190,6 @@ where
 
             // change address to mshr block address
             fetch.access.req_size_bytes = self.cache_config.atom_size;
-            // fetch.data_size = self.cache_config.atom_size();
             fetch.access.addr = mshr_addr;
 
             self.mshrs.add(mshr_addr, fetch.clone());
@@ -274,13 +226,7 @@ where
     }
 }
 
-// impl<I> crate::engine::cycle::Component for Base<I>
-// impl crate::engine::cycle::Component for Base
-// impl<MC, CC> crate::engine::cycle::Component for Base<MC, CC>
-impl<CC> crate::engine::cycle::Component for Base<CC>
-// where
-//     I: ic::MemFetchInterface,
-{
+impl<CC> crate::engine::cycle::Component for Base<CC> {
     /// Sends next request to top memory in the memory hierarchy.
     fn cycle(&mut self, cycle: u64) {
         let Some(ref top_level_memory_port) = self.top_port else {
@@ -306,7 +252,6 @@ impl<CC> crate::engine::cycle::Component for Base<CC>
             } else {
                 fetch.control_size()
             };
-            // if top_level_memory_port.full(fetch.size(), fetch.is_write()) {
             if top_level_memory_port.can_send(&[packet_size]) {
                 let fetch = self.miss_queue.pop_front().unwrap();
                 log::debug!(
@@ -329,13 +274,7 @@ impl<CC> crate::engine::cycle::Component for Base<CC>
     }
 }
 
-// impl Base
-// impl<MC, CC> Base<MC, CC>
-impl<CC> Base<CC>
-// impl<I> Base<I>
-// where
-// I: ic::MemFetchInterface,
-{
+impl<CC> Base<CC> {
     /// Checks whether this request can be handled in this cycle.
     ///
     /// `n` equals the number of misses to be handled in this cycle.
@@ -465,9 +404,6 @@ where
     }
 }
 
-// impl<I> super::Bandwidth for Base<I> {
-// impl super::Bandwidth for Base {
-// impl<MC, CC> super::Bandwidth for Base<MC, CC> {
 impl<CC> super::Bandwidth for Base<CC> {
     fn has_free_data_port(&self) -> bool {
         self.bandwidth.has_free_data_port()

diff --git a/src/cache/data.rs b/src/cache/data.rs
@@ -28,20 +28,12 @@ pub struct Builder<MC, CC> {
 /// at the granularity of individual blocks.
 /// (the policy used in fermi according to the CUDA manual)
 #[derive(Debug)]
-// pub struct Data<I> {
 pub struct Data<MC, CC> {
-    // pub inner: cache::base::Base<I>,
-    // pub inner: cache::base::Base<MC>,
-    // pub inner: cache::base::Base<mcu::MemoryControllerUnit>,
-    // pub inner: cache::base::Base<MC, tag_array::Pascal>,
     pub inner: cache::base::Base<CC>,
 
     /// Memory controller
     pub mem_controller: MC,
 
-    /// Cache controller
-    // pub cache_controller: CC,
-
     /// Specifies type of write allocate request (e.g., L1 or L2)
     write_alloc_type: AccessKind,
     /// Specifies type of writeback request (e.g., L1 or L2)
@@ -50,33 +42,14 @@ pub struct Data<MC, CC> {
 
 impl<MC, CC> Builder<MC, CC>
 where
-    CC: Clone, // where
-               // MC: Clone,
-               // impl Data
-               // impl<I> Data<I>
-               // where
-               // I: ic::MemFetchInterface,
-               // I: Arc<Mutex<crate::fifo::Fifo<mem_fetch::MemFetch>>>,
+    CC: Clone,
 {
-    pub fn build(
-        self, // name: String,
-              // core_id: usize,
-              // cluster_id: usize,
-              // // mem_port: Arc<Mutex<crate::fifo::Fifo<mem_fetch::MemFetch>>>,
-              // stats: Arc<Mutex<stats::Cache>>,
-              // config: Arc<config::GPU>,
-              // cache_config: Arc<config::Cache>,
-              // write_alloc_type: AccessKind,
-              // write_back_type: AccessKind,
-    ) -> Data<MC, CC> {
-        // mem_controller: mcu::MemoryControllerUnit::new(&*config).unwrap(),
-        // let cache_controller = tag_array::Pascal::new((&*self.cache_config).into());
+    pub fn build(self) -> Data<MC, CC> {
         let inner = super::base::Builder {
             name: self.name,
             core_id: self.core_id,
             cluster_id: self.cluster_id,
             stats: self.stats,
-            // mem_controller: self.mem_controller.clone(),
             cache_controller: self.cache_controller,
             cache_config: self.cache_config,
         }
@@ -97,22 +70,11 @@ impl<MC, CC> Data<MC, CC> {
     }
 }
 
-// impl Data {
 impl<MC, CC> Data<MC, CC>
 where
     MC: MemoryController,
     CC: CacheAddressTranslation,
 {
-    // #[inline]
-    // pub fn set_top_port(&mut self, port: ic::Port<mem_fetch::MemFetch>) {
-    //     self.inner.set_top_port(port);
-    // }
-
-    // #[must_use]
-    // pub fn cache_config(&self) -> &Arc<config::Cache> {
-    //     &self.inner.cache_config
-    // }
-
     /// Write-back hit: mark block as modified.
     fn write_hit_write_back(
         &mut self,
@@ -499,7 +461,6 @@ where
                         sector_mask: evicted.sector_mask,
                     }
                     .build();
-                    // let control_size = writeback_access.control_size();
 
                     // the evicted block may have wrong chip id when advanced L2 hashing
                     // is used, so set the right chip address from the original mf
@@ -511,15 +472,12 @@ where
                     tlx_addr.sub_partition = fetch.tlx_addr.sub_partition;
 
                     let partition_addr = self
-                        // .inner
                         .mem_controller
                         .memory_partition_address(writeback_access.addr);
 
                     let writeback_fetch = mem_fetch::Builder {
                         instr: None,
                         access: writeback_access,
-                        // &self.inner.config,
-                        // control_size,
                         warp_id: 0,
                         core_id: 0,
                         cluster_id: 0,
@@ -552,10 +510,7 @@ where
     ) -> cache::RequestStatus {
         use cache::config::WriteAllocatePolicy;
         let func = match self.inner.cache_config.write_allocate_policy {
-            WriteAllocatePolicy::NO_WRITE_ALLOCATE => {
-                // unimplemented!("no write allocate");
-                Self::write_miss_no_write_allocate
-            }
+            WriteAllocatePolicy::NO_WRITE_ALLOCATE => Self::write_miss_no_write_allocate,
             WriteAllocatePolicy::WRITE_ALLOCATE => Self::write_miss_write_allocate_naive,
             WriteAllocatePolicy::FETCH_ON_WRITE => {
                 // Self::write_miss_write_allocate_fetch_on_write
@@ -736,23 +691,16 @@ where
     }
 }
 
-impl<MC, CC> crate::engine::cycle::Component for Data<MC, CC>
-// impl<I> crate::engine::cycle::Component for Data<I>
-// where
-//     I: ic::MemFetchInterface,
-{
+impl<MC, CC> crate::engine::cycle::Component for Data<MC, CC> {
     fn cycle(&mut self, cycle: u64) {
         self.inner.cycle(cycle);
     }
 }
 
 impl<MC, CC> cache::Cache for Data<MC, CC>
 where
-    CC: CacheAddressTranslation, // impl cache::Cache for Data
-    MC: MemoryController,        // impl cache::Cache for Data
-                                 // impl<I> cache::Cache for Data<I>
-                                 // where
-                                 //     I: ic::MemFetchInterface + 'static,
+    CC: CacheAddressTranslation,
+    MC: MemoryController,
 {
     fn as_any(&self) -> &dyn std::any::Any {
         self
@@ -790,7 +738,6 @@ where
 
         let dbg_fetch = fetch.clone();
 
-        // let (cache_index, probe_status) = self
         let probe = self
             .inner
             .tag_array
@@ -800,11 +747,7 @@ where
             .unwrap_or(cache::RequestStatus::RESERVATION_FAIL);
         // dbg!((cache_index, probe_status));
 
-        let access_status = self.process_tag_probe(
-            is_write, probe, // probe_status,
-            // cache_index,
-            addr, fetch, events, time,
-        );
+        let access_status = self.process_tag_probe(is_write, probe, addr, fetch, events, time);
         // dbg!(&access_status);
 
         log::debug!(
@@ -824,7 +767,7 @@ where
             cache::RequestStatus::SECTOR_MISS if access_status != cache::RequestStatus::MISS => {
                 probe_status
             }
-            _status => access_status,
+            _ => access_status,
         };
         let mut stats = self.inner.stats.lock();
         stats.inc(
@@ -871,8 +814,6 @@ where
     }
 }
 
-// impl<I> cache::Bandwidth for Data<I> {
-// impl cache::Bandwidth for Data {
 impl<MC, CC> cache::Bandwidth for Data<MC, CC> {
     fn has_free_data_port(&self) -> bool {
         self.inner.has_free_data_port()