Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Sep 7, 2023
1 parent a176124 commit 06ac340
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 185 deletions.
5 changes: 2 additions & 3 deletions WIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

- today:

- use gpu_mem_alloc for the allocations but still allow smart comparision with play whose traces does not include allocations
- xtask task for converting traces? or do that in the validation component

- validate the number of warp instructions for the execution driven frontend and test that.
Expand All @@ -19,13 +20,13 @@

- per allocation stats

- fix that cache index unwrapping design
- add config parsing for box
- generate data for the different parallel implementations speedups
- how well does it scale for 20 instead of 80 cores
- convert, match and plot statistics
- record mem fetch latency in playground and box

- DONE: fix that cache index unwrapping design
- DONE: playground stats (should match accelsim)
- DONE: builder for mem access as well
- DONE: consolidate deterministic parallelism, make rayon optional finally
Expand Down Expand Up @@ -67,8 +68,6 @@

- todos

- use gpu_mem_alloc for the allocations but still allow smart comparision with play whose traces does not include allocations

- refactor

- look into: // TODO HOTFIX: workaround
Expand Down
2 changes: 1 addition & 1 deletion playground/sys/src/ref/l2_cache_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ unsigned l2_cache_config::set_index(new_addr_type addr) const {
if (m_address_mapping) {
// Calculate set index without memory partition bits to reduce set camping
part_addr = m_address_mapping->partition_address(addr);
fmt::println("partition address for addr {} is {}", addr, part_addr);
// fmt::println("partition address for addr {} is {}", addr, part_addr);
}

return cache_config::set_index(part_addr);
Expand Down
68 changes: 2 additions & 66 deletions src/cache/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,37 +30,24 @@ impl PendingRequest {}
/// Implements common functions for `read_only_cache` and `data_cache`
/// Each subclass implements its own 'access' function
#[derive()]
// pub struct Base {
// pub struct Base<MC, CC> {
pub struct Base<CC> {
pub name: String,
pub core_id: usize,
pub cluster_id: usize,

pub stats: Arc<Mutex<stats::Cache>>,
// pub config: Arc<config::GPU>,
// pub cache_config: Arc<config::Cache>,
pub cache_controller: CC,
// pub cache_controller: tag_array::Pascal,
// pub mem_controller: Box<dyn mcu::MemoryController>,
// pub phantom: std::marker::PhantomData<MC>,
// pub mem_controller: MC,
pub cache_config: cache::Config,

pub miss_queue: VecDeque<mem_fetch::MemFetch>,
pub miss_queue_status: mem_fetch::Status,
pub mshrs: mshr::Table<mem_fetch::MemFetch>,
pub tag_array: tag_array::TagArray<cache::block::Line, CC>,
// pub tag_array: tag_array::TagArray<cache::block::Line, tag_array::Pascal>,
pending: HashMap<mem_fetch::MemFetch, PendingRequest>,
top_port: Option<ic::Port<mem_fetch::MemFetch>>,
// mem_port: Arc<I>,
// Arc<Mutex<crate::fifo::Fifo<mem_fetch::MemFetch>>>,
pub bandwidth: super::bandwidth::Manager,
}

// impl std::fmt::Debug for Base {
// impl<MC, CC> std::fmt::Debug for Base<MC, CC> {
impl<CC> std::fmt::Debug for Base<CC> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("Base")
Expand All @@ -72,48 +59,21 @@ impl<CC> std::fmt::Debug for Base<CC> {
}
}

// pub struct CacheConfig {
// // todo: maybe make that a fifo queue
// pub miss_queue_size: usize,
// }

// #[derive(Debug)]
// pub struct RemoveMe {}
//
// impl mcu::MemoryController for RemoveMe {
// fn memory_partition_address(&self, addr: address) -> address {
// 0
// }
// fn to_physical_address(&self, addr: address) -> mcu::TranslatedAddress {
// mcu::TranslatedAddress::default()
// }
// fn num_memory_partitions(&self) -> usize {
// 0
// }
// fn num_memory_sub_partitions(&self) -> usize {
// 0
// }
// }

#[derive(Debug, Clone)]
// pub struct Builder<MC, CC> {
pub struct Builder<CC> {
pub name: String,
pub core_id: usize,
pub cluster_id: usize,
pub stats: Arc<Mutex<stats::Cache>>,
pub cache_controller: CC,
// pub mem_controller: MC,
pub cache_config: Arc<config::Cache>,
}

// impl<MC, CC> Builder<MC, CC> {
impl<CC> Builder<CC>
where
CC: Clone,
{
#[must_use]
// pub fn build(self) -> Base<MC, CC> {
pub fn build(self) -> Base<CC> {
let cache_config = self.cache_config;
let tag_array =
Expand All @@ -128,7 +88,6 @@ where
let bandwidth = super::bandwidth::Manager::new(cache_config.clone());

let cache_config = cache::Config::from(&*cache_config);
// let cache_controller = tag_array::Pascal::new(cache_config.clone());

let miss_queue = VecDeque::with_capacity(cache_config.miss_queue_size);

Expand All @@ -140,12 +99,7 @@ where
mshrs,
top_port: None,
stats: self.stats,
// config,
// mem_controller: Box::new(ReplaceMe {}),
cache_config,
// addr_translation,
// phantom: std::marker::PhantomData,
// mem_controller: self.mem_controller,
cache_controller: self.cache_controller,
bandwidth,
pending: HashMap::new(),
Expand All @@ -155,7 +109,6 @@ where
}
}

// impl<MC, CC> Base<MC, CC>
impl<CC> Base<CC>
where
CC: CacheAddressTranslation,
Expand Down Expand Up @@ -237,7 +190,6 @@ where

// change address to mshr block address
fetch.access.req_size_bytes = self.cache_config.atom_size;
// fetch.data_size = self.cache_config.atom_size();
fetch.access.addr = mshr_addr;

self.mshrs.add(mshr_addr, fetch.clone());
Expand Down Expand Up @@ -274,13 +226,7 @@ where
}
}

// impl<I> crate::engine::cycle::Component for Base<I>
// impl crate::engine::cycle::Component for Base
// impl<MC, CC> crate::engine::cycle::Component for Base<MC, CC>
impl<CC> crate::engine::cycle::Component for Base<CC>
// where
// I: ic::MemFetchInterface,
{
impl<CC> crate::engine::cycle::Component for Base<CC> {
/// Sends next request to top memory in the memory hierarchy.
fn cycle(&mut self, cycle: u64) {
let Some(ref top_level_memory_port) = self.top_port else {
Expand All @@ -306,7 +252,6 @@ impl<CC> crate::engine::cycle::Component for Base<CC>
} else {
fetch.control_size()
};
// if top_level_memory_port.full(fetch.size(), fetch.is_write()) {
if top_level_memory_port.can_send(&[packet_size]) {
let fetch = self.miss_queue.pop_front().unwrap();
log::debug!(
Expand All @@ -329,13 +274,7 @@ impl<CC> crate::engine::cycle::Component for Base<CC>
}
}

// impl Base
// impl<MC, CC> Base<MC, CC>
impl<CC> Base<CC>
// impl<I> Base<I>
// where
// I: ic::MemFetchInterface,
{
impl<CC> Base<CC> {
/// Checks whether this request can be handled in this cycle.
///
/// `n` equals the number of misses to be handled in this cycle.
Expand Down Expand Up @@ -465,9 +404,6 @@ where
}
}

// impl<I> super::Bandwidth for Base<I> {
// impl super::Bandwidth for Base {
// impl<MC, CC> super::Bandwidth for Base<MC, CC> {
impl<CC> super::Bandwidth for Base<CC> {
fn has_free_data_port(&self) -> bool {
self.bandwidth.has_free_data_port()
Expand Down
75 changes: 8 additions & 67 deletions src/cache/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,12 @@ pub struct Builder<MC, CC> {
/// at the granularity of individual blocks.
/// (the policy used in fermi according to the CUDA manual)
#[derive(Debug)]
// pub struct Data<I> {
pub struct Data<MC, CC> {
// pub inner: cache::base::Base<I>,
// pub inner: cache::base::Base<MC>,
// pub inner: cache::base::Base<mcu::MemoryControllerUnit>,
// pub inner: cache::base::Base<MC, tag_array::Pascal>,
pub inner: cache::base::Base<CC>,

/// Memory controller
pub mem_controller: MC,

/// Cache controller
// pub cache_controller: CC,

/// Specifies type of write allocate request (e.g., L1 or L2)
write_alloc_type: AccessKind,
/// Specifies type of writeback request (e.g., L1 or L2)
Expand All @@ -50,33 +42,14 @@ pub struct Data<MC, CC> {

impl<MC, CC> Builder<MC, CC>
where
CC: Clone, // where
// MC: Clone,
// impl Data
// impl<I> Data<I>
// where
// I: ic::MemFetchInterface,
// I: Arc<Mutex<crate::fifo::Fifo<mem_fetch::MemFetch>>>,
CC: Clone,
{
pub fn build(
self, // name: String,
// core_id: usize,
// cluster_id: usize,
// // mem_port: Arc<Mutex<crate::fifo::Fifo<mem_fetch::MemFetch>>>,
// stats: Arc<Mutex<stats::Cache>>,
// config: Arc<config::GPU>,
// cache_config: Arc<config::Cache>,
// write_alloc_type: AccessKind,
// write_back_type: AccessKind,
) -> Data<MC, CC> {
// mem_controller: mcu::MemoryControllerUnit::new(&*config).unwrap(),
// let cache_controller = tag_array::Pascal::new((&*self.cache_config).into());
pub fn build(self) -> Data<MC, CC> {
let inner = super::base::Builder {
name: self.name,
core_id: self.core_id,
cluster_id: self.cluster_id,
stats: self.stats,
// mem_controller: self.mem_controller.clone(),
cache_controller: self.cache_controller,
cache_config: self.cache_config,
}
Expand All @@ -97,22 +70,11 @@ impl<MC, CC> Data<MC, CC> {
}
}

// impl Data {
impl<MC, CC> Data<MC, CC>
where
MC: MemoryController,
CC: CacheAddressTranslation,
{
// #[inline]
// pub fn set_top_port(&mut self, port: ic::Port<mem_fetch::MemFetch>) {
// self.inner.set_top_port(port);
// }

// #[must_use]
// pub fn cache_config(&self) -> &Arc<config::Cache> {
// &self.inner.cache_config
// }

/// Write-back hit: mark block as modified.
fn write_hit_write_back(
&mut self,
Expand Down Expand Up @@ -499,7 +461,6 @@ where
sector_mask: evicted.sector_mask,
}
.build();
// let control_size = writeback_access.control_size();

// the evicted block may have wrong chip id when advanced L2 hashing
// is used, so set the right chip address from the original mf
Expand All @@ -511,15 +472,12 @@ where
tlx_addr.sub_partition = fetch.tlx_addr.sub_partition;

let partition_addr = self
// .inner
.mem_controller
.memory_partition_address(writeback_access.addr);

let writeback_fetch = mem_fetch::Builder {
instr: None,
access: writeback_access,
// &self.inner.config,
// control_size,
warp_id: 0,
core_id: 0,
cluster_id: 0,
Expand Down Expand Up @@ -552,10 +510,7 @@ where
) -> cache::RequestStatus {
use cache::config::WriteAllocatePolicy;
let func = match self.inner.cache_config.write_allocate_policy {
WriteAllocatePolicy::NO_WRITE_ALLOCATE => {
// unimplemented!("no write allocate");
Self::write_miss_no_write_allocate
}
WriteAllocatePolicy::NO_WRITE_ALLOCATE => Self::write_miss_no_write_allocate,
WriteAllocatePolicy::WRITE_ALLOCATE => Self::write_miss_write_allocate_naive,
WriteAllocatePolicy::FETCH_ON_WRITE => {
// Self::write_miss_write_allocate_fetch_on_write
Expand Down Expand Up @@ -736,23 +691,16 @@ where
}
}

impl<MC, CC> crate::engine::cycle::Component for Data<MC, CC>
// impl<I> crate::engine::cycle::Component for Data<I>
// where
// I: ic::MemFetchInterface,
{
impl<MC, CC> crate::engine::cycle::Component for Data<MC, CC> {
fn cycle(&mut self, cycle: u64) {
self.inner.cycle(cycle);
}
}

impl<MC, CC> cache::Cache for Data<MC, CC>
where
CC: CacheAddressTranslation, // impl cache::Cache for Data
MC: MemoryController, // impl cache::Cache for Data
// impl<I> cache::Cache for Data<I>
// where
// I: ic::MemFetchInterface + 'static,
CC: CacheAddressTranslation,
MC: MemoryController,
{
fn as_any(&self) -> &dyn std::any::Any {
self
Expand Down Expand Up @@ -790,7 +738,6 @@ where

let dbg_fetch = fetch.clone();

// let (cache_index, probe_status) = self
let probe = self
.inner
.tag_array
Expand All @@ -800,11 +747,7 @@ where
.unwrap_or(cache::RequestStatus::RESERVATION_FAIL);
// dbg!((cache_index, probe_status));

let access_status = self.process_tag_probe(
is_write, probe, // probe_status,
// cache_index,
addr, fetch, events, time,
);
let access_status = self.process_tag_probe(is_write, probe, addr, fetch, events, time);
// dbg!(&access_status);

log::debug!(
Expand All @@ -824,7 +767,7 @@ where
cache::RequestStatus::SECTOR_MISS if access_status != cache::RequestStatus::MISS => {
probe_status
}
_status => access_status,
_ => access_status,
};
let mut stats = self.inner.stats.lock();
stats.inc(
Expand Down Expand Up @@ -871,8 +814,6 @@ where
}
}

// impl<I> cache::Bandwidth for Data<I> {
// impl cache::Bandwidth for Data {
impl<MC, CC> cache::Bandwidth for Data<MC, CC> {
fn has_free_data_port(&self) -> bool {
self.inner.has_free_data_port()
Expand Down
Loading

0 comments on commit 06ac340

Please sign in to comment.