Skip to content

Commit

Permalink
exec: trace barrier instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Feb 12, 2024
1 parent 290ab8b commit 0ac0aa1
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 80 deletions.
5 changes: 3 additions & 2 deletions benchmarks/src/matrixmul.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use gpucachesim::exec::{
alloc,
model::{Dim, MemorySpace},
tracegen::{TraceGenerator, Tracer},
tracegen::{self, TraceGenerator, Tracer},
Kernel, ThreadBlock, ThreadIndex,
};
use num_traits::{Float, Zero};
Expand Down Expand Up @@ -255,8 +255,9 @@ where
shared_mem_b: Mutex::new(shared_mem_b),
num_rows,
};
let options = tracegen::Options::default();
let trace = tracer
.trace_kernel(grid_dim, block_dim, &mut kernel)
.trace_kernel(grid_dim, block_dim, &mut kernel, &options)
.await?;
Ok((tracer.commands().await, vec![trace]))
}
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/src/pchase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

use gpucachesim::exec::{
alloc,
tracegen::{TraceGenerator, Tracer},
tracegen::{self, TraceGenerator, Tracer},
Kernel, MemorySpace, ThreadBlock, ThreadIndex,
};
// use num_traits::{Float, Zero};

use tokio::sync::Mutex;

Expand Down Expand Up @@ -119,8 +118,9 @@ pub async fn pchase(
};
let grid_size = 1;
let block_size = 1;
let options = tracegen::Options::default();
let trace = tracer
.trace_kernel(grid_size, block_size, &mut kernel)
.trace_kernel(grid_size, block_size, &mut kernel, &options)
.await?;
traces.push(trace);
Ok((tracer.commands().await, traces))
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/src/simple_matrixmul.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use gpucachesim::exec::{
alloc,
model::{Dim, MemorySpace},
tracegen::{TraceGenerator, Tracer},
tracegen::{self, TraceGenerator, Tracer},
Kernel, ThreadBlock, ThreadIndex,
};
use num_traits::{Float, NumCast, Zero};
Expand Down Expand Up @@ -175,8 +175,9 @@ where
n,
p,
};
let options = tracegen::Options::default();
let trace = tracer
.trace_kernel(grid_dim, block_dim, &mut kernel)
.trace_kernel(grid_dim, block_dim, &mut kernel, &options)
.await?;
Ok((tracer.commands().await, vec![trace]))
}
Expand Down
14 changes: 12 additions & 2 deletions benchmarks/src/transpose.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use color_eyre::eyre;
use gpucachesim::exec::{
alloc,
model::{Dim, MemorySpace},
tracegen::{TraceGenerator, Tracer},
tracegen::{self, TraceGenerator, Tracer},
Kernel,
};
use num_traits::{Float, NumCast, Zero};
Expand Down Expand Up @@ -117,6 +117,7 @@ pub mod coalesced {

let tile_idx = (tid.thread_idx.y + i) * TILE_DIM + tid.thread_idx.x;
let mat_idx = index_in + i as usize * self.cols;
dbg!(mat_idx);
tiles[(tid, tile_idx as usize)] = dev_mat[(tid, mat_idx)];
i += BLOCK_ROWS;
}
Expand Down Expand Up @@ -239,6 +240,7 @@ where
Debug, Clone, Copy, Hash, strum::EnumString, PartialEq, Eq, PartialOrd, Ord, serde::Deserialize,
)]
#[serde(rename_all = "lowercase")]
#[strum(ascii_case_insensitive)]
pub enum Variant {
Naive,
Coalesced,
Expand Down Expand Up @@ -401,8 +403,11 @@ where
rows,
cols,
};
// run once
let mut result =
transpose::<T, coalesced::Transpose<T>>(&tracer, rows, cols, &mut kernel).await?;

// run more times
for _ in 0..repetitions {
let (commands, traces) =
transpose::<T, coalesced::Transpose<T>>(&tracer, rows, cols, &mut kernel).await?;
Expand Down Expand Up @@ -506,7 +511,12 @@ where
assert!(grid_dim.y > 0);
assert!(grid_dim.z > 0);

let trace = tracer.trace_kernel(grid_dim, block_dim, kernel).await?;
let options = tracegen::Options {
no_data_dependency: true,
};
let trace = tracer
.trace_kernel(grid_dim, block_dim, kernel, &options)
.await?;
Ok((tracer.commands().await, vec![trace]))
}

Expand Down
5 changes: 3 additions & 2 deletions benchmarks/src/vectoradd.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use gpucachesim::exec::tracegen::{TraceGenerator, Tracer};
use gpucachesim::exec::tracegen::{self, TraceGenerator, Tracer};
use gpucachesim::exec::{alloc, Kernel, MemorySpace, ThreadBlock, ThreadIndex};
use num_traits::{Float, Zero};

Expand Down Expand Up @@ -123,8 +123,9 @@ where
dev_result: Mutex::new(dev_result),
n,
};
let options = tracegen::Options::default();
let trace = tracer
.trace_kernel(grid_size, BLOCK_SIZE, &mut kernel)
.trace_kernel(grid_size, BLOCK_SIZE, &mut kernel, &options)
.await?;
Ok((tracer.commands().await, vec![trace]))
}
Expand Down
15 changes: 9 additions & 6 deletions exec/src/cfg.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
use crate::model::{MemInstruction, ThreadInstruction};
use crate::model::{Instruction, MemInstruction, ThreadInstruction};
use petgraph::prelude::*;

#[derive(Debug)]
pub enum TraceNode {
Branch {
branch_id: usize,
id: usize,
instructions: Vec<MemInstruction>,
instructions: Vec<Instruction>,
},
Reconverge {
branch_id: usize,
id: usize,
instructions: Vec<MemInstruction>,
instructions: Vec<Instruction>,
},
}

Expand Down Expand Up @@ -51,7 +51,7 @@ impl TraceNode {

// #[inline]
#[must_use]
pub fn instructions(&self) -> &[MemInstruction] {
pub fn instructions(&self) -> &[Instruction] {
match self {
Self::Branch { instructions, .. } | Self::Reconverge { instructions, .. } => {
instructions
Expand Down Expand Up @@ -240,7 +240,7 @@ pub fn build_control_flow_graph(
instructions: vec![],
});
let mut last_thread_cfg_node_idx = thread_cfg_root_node_idx;
let mut current_instructions = Vec::new();
let mut current_instructions: Vec<Instruction> = Vec::new();
let mut branch_taken: HashMap<usize, bool> = HashMap::new();

let mut unique_node_ids: HashMap<usize, usize> = HashMap::new();
Expand Down Expand Up @@ -305,7 +305,10 @@ pub fn build_control_flow_graph(
*node_id += 1;
}
ThreadInstruction::Access(access) => {
current_instructions.push(access.clone());
current_instructions.push(Instruction::Memory(access.clone()));
}
ThreadInstruction::Barrier => {
current_instructions.push(Instruction::Barrier);
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions exec/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ pub struct ThreadBlock {

impl ThreadBlock {
pub async fn synchronize_threads(&self) {
let inst = model::ThreadInstruction::Barrier;
self.memory.push_thread_instruction(&self.thread_id, inst);
self.barrier.wait().await;
}

Expand Down
14 changes: 11 additions & 3 deletions exec/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ pub enum MemAccessKind {
Store,
}

/// Warp instruction
#[derive(Debug, Clone, Ord, PartialOrd)]
/// Memory instruction
#[derive(Debug, Clone, Hash, Ord, PartialOrd)]
pub struct MemInstruction {
pub mem_space: MemorySpace,
pub kind: MemAccessKind,
Expand All @@ -58,10 +58,18 @@ impl PartialEq for MemInstruction {
}
}

/// Warp instruction
/// Instruction
#[derive(Debug, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
pub enum Instruction {
Memory(MemInstruction),
Barrier,
}

/// Thread instruction
#[derive(Debug, Clone, PartialOrd, Ord)]
pub enum ThreadInstruction {
Access(MemInstruction),
Barrier,
Nop,
Branch(usize),
TookBranch(usize),
Expand Down
Loading

0 comments on commit 0ac0aa1

Please sign in to comment.