Skip to content

Commit

Permalink
refactor and lint
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Aug 7, 2023
1 parent e4786d7 commit 13b504c
Show file tree
Hide file tree
Showing 52 changed files with 2,515 additions and 3,532 deletions.
30 changes: 23 additions & 7 deletions WIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,38 @@

- today:

- test flush caches using config options
- less important:
- todos

- perf: investigate if the many small allocations of msg for move in / move warp etc are problematic
- perf: investigate the performance overhead for finding the allocation ids
- use gpu_mem_alloc for the allocations but still allow smart comparision with play whose traces does not include allocations

- refactor

- join core and inner core
- flatten ported submodule
- lint
- factor into multiple files
- some minor todos
- remove dead code
- instantiate the entire GPU in one file to find a good API
- factor out traits

- generate plots and correlation stuff etc

- less important:

- fix: investigate lockstep performance and see if we can reduce allocations?
- fix: remove global statics to allow running tests in parallel
- parse accelsim config files

- with defaults for compatibility

- test flush caches using config options
- perf: investigate if the many small allocations of msg for move in / move warp etc are problematic
- perf: investigate the performance overhead for finding the allocation ids
- perf: investigate lockstep performance and see if we can reduce allocations?

- allow basic configurations for the playground bridge

-
- FIX: add l2 set index back in
- generate plots and correlation stuff etc

- DONE: multiple memories
- DONE: lockstep with multiple cores and clusters
Expand Down
1 change: 1 addition & 0 deletions accelsim/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ impl TryFrom<Stats> for stats::Stats {
l1c_stats: stats::PerCache::default(),
l1d_stats: stats::PerCache::default(),
l2d_stats,
stall_dram_full: 0, // todo
})
}
}
2 changes: 0 additions & 2 deletions accelsim/src/tracegen/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,6 @@ pub fn parse_trace_instruction(

// parse addresses
if mem_width > 0 {
// let width = super::get_data_width_from_opcode(&opcode)?;

let address_format: usize = parse_decimal(values.pop_front(), "mem address format")?;
let address_format = AddressFormat::from_repr(address_format)
.ok_or_else(|| eyre::eyre!("unknown mem address format: {:?}", address_format))?;
Expand Down
33 changes: 17 additions & 16 deletions benches/vectoradd.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]

use color_eyre::eyre;
use criterion::{black_box, Criterion};
use validate::materialize::{BenchmarkConfig, Benchmarks};
Expand All @@ -21,8 +23,8 @@ fn get_bench_config(benchmark_name: &str, input_idx: usize) -> eyre::Result<Benc
Ok(bench_config.clone())
}

pub fn run_box(bench_config: BenchmarkConfig) -> eyre::Result<()> {
let _stats = validate::simulate::simulate_bench_config(&bench_config)?;
pub fn run_box(bench_config: &BenchmarkConfig) -> eyre::Result<()> {
let _stats = validate::simulate::simulate_bench_config(bench_config)?;
Ok(())
}

Expand All @@ -31,8 +33,8 @@ pub async fn run_accelsim(bench_config: BenchmarkConfig) -> eyre::Result<()> {
Ok(())
}

pub fn run_playground(bench_config: BenchmarkConfig) -> eyre::Result<()> {
let _stats = validate::playground::simulate_bench_config(&bench_config);
pub fn run_playground(bench_config: &BenchmarkConfig) -> eyre::Result<()> {
let _stats = validate::playground::simulate_bench_config(bench_config);
Ok(())
}

Expand All @@ -48,7 +50,7 @@ pub fn accelsim_benchmark(c: &mut Criterion) {

group.bench_function("vectoradd/10000", |b| {
b.to_async(&runtime)
.iter(|| run_accelsim(black_box(get_bench_config("vectorAdd", 2).unwrap())))
.iter(|| run_accelsim(black_box(get_bench_config("vectorAdd", 2).unwrap())));
});
// group.bench_function("transpose/256/naive", |b| {
// b.iter(|| run_accelsim(black_box(get_bench_config("transpose", 0).unwrap())))
Expand All @@ -61,7 +63,7 @@ pub fn play_benchmark(c: &mut Criterion) {
group.sampling_mode(criterion::SamplingMode::Flat);

group.bench_function("vectoradd/10000", |b| {
b.iter(|| run_playground(black_box(get_bench_config("vectorAdd", 2).unwrap())))
b.iter(|| run_playground(&black_box(get_bench_config("vectorAdd", 2).unwrap())));
});
// group.bench_function("transpose/256/naive", |b| {
// b.iter(|| run_playground(black_box(get_bench_config("transpose", 0).unwrap())))
Expand All @@ -74,38 +76,37 @@ pub fn box_benchmark(c: &mut Criterion) {
group.sampling_mode(criterion::SamplingMode::Flat);

group.bench_function("vectoradd/10000", |b| {
b.iter(|| run_box(black_box(get_bench_config("vectorAdd", 2).unwrap())))
b.iter(|| run_box(&black_box(get_bench_config("vectorAdd", 2).unwrap())));
});
// group.bench_function("transpose/256/naive", |b| {
// b.iter(|| run_box(black_box(get_bench_config("transpose", 0).unwrap())))
// });
}

criterion::criterion_group!(benches, box_benchmark, play_benchmark, accelsim_benchmark);
criterion::criterion_main!(benches);
// criterion::criterion_main!(benches);

#[allow(dead_code)]
fn custom() -> eyre::Result<()> {
fn main() -> eyre::Result<()> {
use std::time::Instant;

let runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.expect("build tokio runtime");
.build()?;

let mut start = Instant::now();
let _ = run_box(black_box(get_bench_config("transpose", 0)?));
let _ = run_box(&black_box(get_bench_config("transpose", 0)?));
println!("box took:\t\t{:?}", start.elapsed());

start = Instant::now();
let _ = run_playground(black_box(get_bench_config("transpose", 0)?));
let _ = run_playground(&black_box(get_bench_config("transpose", 0)?));
println!("play took:\t\t{:?}", start.elapsed());

start = Instant::now();
let _ = runtime.block_on(async {
let _ = run_accelsim(black_box(get_bench_config("transpose", 0)?)).await?;
runtime.block_on(async {
run_accelsim(black_box(get_bench_config("transpose", 0)?)).await?;
Ok::<(), eyre::Report>(())
});
})?;
println!("accel took:\t\t{:?}", start.elapsed());

Ok(())
Expand Down
115 changes: 58 additions & 57 deletions examples/pycachesim.rs
Original file line number Diff line number Diff line change
@@ -1,57 +1,58 @@
#![allow(warnings)]

use casimu::{cache::LRU, Cache, CacheConfig, MainMemory, Simulation};
use std::sync::Arc;

const CACHELINE_SIZE: usize = 64;

fn main() {
let mut mem = MainMemory::new();
let l3 = Arc::new(Cache::new(CacheConfig {
name: "L3".to_string(),
sets: 20480,
ways: 16,
line_size: CACHELINE_SIZE,
replacement_policy: LRU {},
write_back: true,
write_allocate: true,
store_to: None,
load_from: None,
victims_to: None,
swap_on_load: false,
}));
mem.set_load_to(l3.clone());
mem.set_store_from(l3.clone());

let l2 = Arc::new(Cache::new(CacheConfig {
name: "L2".to_string(),
sets: 512,
ways: 8,
line_size: CACHELINE_SIZE,
replacement_policy: LRU {},
write_back: true,
write_allocate: true,
store_to: Some(l3.clone()),
load_from: Some(l3),
victims_to: None,
swap_on_load: false,
}));
let l1 = Arc::new(Cache::new(CacheConfig {
name: "L1".to_string(),
sets: 64,
ways: 8,
line_size: CACHELINE_SIZE,
replacement_policy: LRU {},
write_back: true,
write_allocate: true,
store_to: Some(l2.clone()),
load_from: Some(l2),
victims_to: None,
swap_on_load: false, // incl/excl does not matter in first level
}));

// let mut sim = Simulation::new(l1.clone(), mem);
// sim.load(23)
// cv = CacheVisualizer(cs, [10, 16])
// sim.dump_state()
}
// #![allow(warnings)]
//
// use casimu::{cache::LRU, Cache, CacheConfig, MainMemory, Simulation};
// use std::sync::Arc;
//
// const CACHELINE_SIZE: usize = 64;
//
// fn main() {
// let mut mem = MainMemory::new();
// let l3 = Arc::new(Cache::new(CacheConfig {
// name: "L3".to_string(),
// sets: 20480,
// ways: 16,
// line_size: CACHELINE_SIZE,
// replacement_policy: LRU {},
// write_back: true,
// write_allocate: true,
// store_to: None,
// load_from: None,
// victims_to: None,
// swap_on_load: false,
// }));
// mem.set_load_to(l3.clone());
// mem.set_store_from(l3.clone());
//
// let l2 = Arc::new(Cache::new(CacheConfig {
// name: "L2".to_string(),
// sets: 512,
// ways: 8,
// line_size: CACHELINE_SIZE,
// replacement_policy: LRU {},
// write_back: true,
// write_allocate: true,
// store_to: Some(l3.clone()),
// load_from: Some(l3),
// victims_to: None,
// swap_on_load: false,
// }));
// let l1 = Arc::new(Cache::new(CacheConfig {
// name: "L1".to_string(),
// sets: 64,
// ways: 8,
// line_size: CACHELINE_SIZE,
// replacement_policy: LRU {},
// write_back: true,
// write_allocate: true,
// store_to: Some(l2.clone()),
// load_from: Some(l2),
// victims_to: None,
// swap_on_load: false, // incl/excl does not matter in first level
// }));
//
// // let mut sim = Simulation::new(l1.clone(), mem);
// // sim.load(23)
// // cv = CacheVisualizer(cs, [10, 16])
// // sim.dump_state()
// }
fn main() {}
Loading

0 comments on commit 13b504c

Please sign in to comment.