Skip to content

Commit

Permalink
nondeterministic
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Aug 31, 2023
1 parent 72c5433 commit 6b2f596
Show file tree
Hide file tree
Showing 21 changed files with 3,141 additions and 1,189 deletions.
30 changes: 30 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ publish = false

[features]
# default = ["parking_lot"]
# default = ["stats", "parking_lot", "parallel"]
default = ["stats", "parallel"]
default = ["stats", "parking_lot", "parallel"]
# default = ["stats", "parking_lot", "parallel", "deadlock_detection"]
# default = ["stats", "parallel"]
# default = ["stats", "parking_lot"]
deadlock_detection = ["parking_lot/deadlock_detection"]
timings = []
stats = []
parallel = []
# parallel = ["dep:rayon"]
Expand Down Expand Up @@ -91,7 +94,7 @@ ndarray = "0"
flume = "0"
crossbeam = "0"
num_cpus = "1"
parking_lot = "0"
parking_lot = { version = "0", features = ["deadlock_detection"] }

# tracing
tracing = "0"
Expand Down
8 changes: 6 additions & 2 deletions WIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@

- easy chores:

- publish python package to pip

- today:

- record mem fetch latency in playground and box
- write trait for tag array
- rename crates and github repo
- publish to crates.io
- publish python package to pip

- today:
- TODO:

- write a direct connection interconn trait that replaces the fifos etc.
- check statistical error for non deterministic version
Expand Down
89 changes: 61 additions & 28 deletions benches/vectoradd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ fn get_bench_config(benchmark_name: &str, input_idx: usize) -> eyre::Result<Benc
Ok(bench_config.clone())
}

pub fn run_box(mut bench_config: BenchmarkConfig) -> eyre::Result<stats::Stats> {
bench_config.simulate.parallel =
std::env::var("PARALLEL").unwrap_or_default().to_lowercase() == "yes";
println!("parallel: {}", bench_config.simulate.parallel);
let stats = validate::simulate::simulate_bench_config(&bench_config)?;
pub fn run_box(mut bench_config: BenchmarkConfig, serial: bool) -> eyre::Result<stats::Stats> {
bench_config.simulate.parallel = !serial;
// if std::env::var("PARALLEL").unwrap_or_default().to_lowercase() == "yes";
// println!("parallel: {}", bench_config.simulate.parallel);
let sim = validate::simulate::simulate_bench_config(&bench_config)?;
// fast parallel: cycle loop time: 558485 ns
// serial: cycle loop time: 2814591 ns (speedup 5x)
// have 80 cores and 16 threads
Expand All @@ -51,6 +51,7 @@ pub fn run_box(mut bench_config: BenchmarkConfig) -> eyre::Result<stats::Stats>
// println!("{name} time: {ms:.5} ms ({percent:>2.2}%)");
// }
// println!();
let stats = sim.stats();
Ok(stats)
}

Expand Down Expand Up @@ -102,7 +103,7 @@ pub fn box_benchmark(c: &mut Criterion) {
group.sampling_mode(criterion::SamplingMode::Flat);

group.bench_function("vectoradd/10000", |b| {
b.iter(|| run_box(black_box(get_bench_config("vectorAdd", 2).unwrap())));
b.iter(|| run_box(black_box(get_bench_config("vectorAdd", 2).unwrap()), true));
});
// group.bench_function("transpose/256/naive", |b| {
// b.iter(|| run_box(black_box(get_bench_config("transpose", 0).unwrap())))
Expand Down Expand Up @@ -151,7 +152,7 @@ fn main() -> eyre::Result<()> {
.build()?;

let start = Instant::now();
let stats = run_box(black_box(get_bench_config(bench_name, input_num)?))?;
let stats = run_box(black_box(get_bench_config(bench_name, input_num)?), false)?;
dbg!(&stats.sim);
let box_dur = start.elapsed();
println!("box took:\t\t{box_dur:?}");
Expand All @@ -161,34 +162,66 @@ fn main() -> eyre::Result<()> {
return Ok(());
}

let timings = casimu::TIMINGS.lock();
println!("sorted by NAME");
for (name, dur) in timings.iter().sorted_by_key(|(&name, _dur)| name) {
println!(
"\t{name:<30}: {:>6.5} ms avg ({:>2.6} sec total)",
dur.mean().as_secs_f64() * 1000.0,
dur.total().as_secs_f64(),
);
{
let timings = casimu::TIMINGS.lock();
println!("sorted by NAME");
for (name, dur) in timings.iter().sorted_by_key(|(&name, _dur)| name) {
println!(
"\t{name:<30}: {:>6.5} ms avg ({:>2.6} sec total)",
dur.mean().as_secs_f64() * 1000.0,
dur.total().as_secs_f64(),
);
}
println!();
println!("sorted by TOTAL DURATION");
for (name, dur) in timings.iter().sorted_by_key(|(_name, dur)| dur.total()) {
println!(
"\t{name:<30}: {:>6.5} ms avg ({:>2.6} sec total)",
dur.mean().as_secs_f64() * 1000.0,
dur.total().as_secs_f64(),
);
}
println!();
}
println!();
println!("sorted by TOTAL DURATION");
for (name, dur) in timings.iter().sorted_by_key(|(_name, dur)| dur.total()) {
println!(
"\t{name:<30}: {:>6.5} ms avg ({:>2.6} sec total)",
dur.mean().as_secs_f64() * 1000.0,
dur.total().as_secs_f64(),
);

// clear timing measurements
casimu::TIMINGS.lock().clear();

let start = Instant::now();
let stats = run_box(black_box(get_bench_config(bench_name, input_num)?), true)?;
dbg!(&stats.sim);
let serial_box_dur = start.elapsed();
println!("serial box took:\t\t{serial_box_dur:?}");
println!(
"speedup is :\t\t{:.2}",
serial_box_dur.as_secs_f64() / box_dur.as_secs_f64()
);
{
let timings = casimu::TIMINGS.lock();
println!("sorted by NAME");
for (name, dur) in timings.iter().sorted_by_key(|(&name, _dur)| name) {
println!(
"\t{name:<30}: {:>6.5} ms avg ({:>2.6} sec total)",
dur.mean().as_secs_f64() * 1000.0,
dur.total().as_secs_f64(),
);
}
println!();
println!("sorted by TOTAL DURATION");
for (name, dur) in timings.iter().sorted_by_key(|(_name, dur)| dur.total()) {
println!(
"\t{name:<30}: {:>6.5} ms avg ({:>2.6} sec total)",
dur.mean().as_secs_f64() * 1000.0,
dur.total().as_secs_f64(),
);
}
println!();
}
println!();

let start = Instant::now();
run_playground(&black_box(get_bench_config(bench_name, input_num)?))?;
let play_dur = start.elapsed();
println!("play took:\t\t{play_dur:?}");
println!(
"speedup is :\t\t{:.2}",
play_dur.as_secs_f64() / box_dur.as_secs_f64()
);

let start = Instant::now();
runtime.block_on(async {
Expand Down
3 changes: 3 additions & 0 deletions playground/sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ fn build_config_parser_in_source() -> eyre::Result<()> {
let mut build = cc::Build::new();
build
.cpp(true)
.pic(true)
.static_flag(true)
.warnings(false)
.files(parser_sources);
Expand Down Expand Up @@ -254,6 +255,7 @@ fn build_config_parser() -> eyre::Result<PathBuf> {
let mut build = cc::Build::new();
build
.cpp(true)
.pic(true)
.static_flag(true)
.include("./src/ref/intersim2/")
.warnings(false)
Expand Down Expand Up @@ -282,6 +284,7 @@ fn build_spdlog(
let mut build = cc::Build::new();
build
.cpp(true)
.pic(true)
.static_flag(true)
.warnings(false)
.include(include_dir)
Expand Down
3 changes: 0 additions & 3 deletions src/cache/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,6 @@ impl crate::engine::cycle::Component for Base
{
/// Sends next request to top memory in the memory hierarchy.
fn cycle(&mut self, cycle: u64) {


let Some(ref top_level_memory_port) = self.top_port else {
panic!("missing top port");
return;
Expand Down Expand Up @@ -322,7 +320,6 @@ impl crate::engine::cycle::Component for Base
data: fetch,
time: cycle,
});
// top_level_memory_port.send(fetch, cycle);
}
}
// let _data_port_busy = !self.has_free_data_port();
Expand Down
6 changes: 4 additions & 2 deletions src/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ where
// // sim.select_kernel().map(Arc::clone);
// unimplemented!("concurrent kernel sm");
// } else {
let mut current_kernel = core.current_kernel.try_lock().clone();
let mut current_kernel: Option<Arc<_>> =
core.current_kernel.try_lock().as_ref().map(Arc::clone);
let should_select_new_kernel = if let Some(ref current) = current_kernel {
// if no more blocks left, get new kernel once current block completes
current.no_more_blocks_to_run() && core.not_completed() == 0
Expand All @@ -247,7 +248,7 @@ where

// dbg!(&should_select_new_kernel);
if should_select_new_kernel {
current_kernel = crate::timeit!(sim.select_kernel());
current_kernel = sim.select_kernel();
// current_kernel = sim.select_kernel();
// if let Some(ref k) = current_kernel {
// log::debug!("kernel {} bind to core {:?}", kernel, self.id());
Expand All @@ -273,6 +274,7 @@ where
drop(core);
if can_issue {
let mut core = self.cores[core_id].write();
// println!("SERIAL issue to {:?}", core.id());
core.issue_block(&kernel, cycle);
num_blocks_issued += 1;
*block_issue_next_core = core_id;
Expand Down
Loading

0 comments on commit 6b2f596

Please sign in to comment.