From 71794468679341bcd61602cc314b93dbe8f2ffb0 Mon Sep 17 00:00:00 2001 From: romnnn Date: Tue, 13 Feb 2024 01:03:19 +0100 Subject: [PATCH] exec: implement basic rendering of control flow graphs --- exec/src/cfg.rs | 174 +++++++++++--- exec/src/tracegen.rs | 224 ++++-------------- .../vectoradd/block___0___0___0_warp__0.svg | 51 ++++ 3 files changed, 234 insertions(+), 215 deletions(-) create mode 100644 exec/testing/vectoradd/block___0___0___0_warp__0.svg diff --git a/exec/src/cfg.rs b/exec/src/cfg.rs index 347dcd2d..7c817244 100644 --- a/exec/src/cfg.rs +++ b/exec/src/cfg.rs @@ -2,7 +2,7 @@ use crate::model::{Instruction, ThreadInstruction}; use petgraph::prelude::*; #[derive(Debug)] -pub enum TraceNode { +pub enum ThreadNode { Branch { branch_id: usize, id: usize, @@ -15,7 +15,7 @@ pub enum TraceNode { }, } -impl std::fmt::Display for TraceNode { +impl std::fmt::Display for ThreadNode { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let branch_id = self.branch_id(); let id = self.id(); @@ -32,7 +32,7 @@ impl std::fmt::Display for TraceNode { } } -impl TraceNode { +impl ThreadNode { // #[inline] #[must_use] pub fn branch_id(&self) -> usize { @@ -90,13 +90,13 @@ impl WarpNode { } #[allow(clippy::match_same_arms)] -impl PartialEq for WarpNode { - fn eq(&self, other: &TraceNode) -> bool { +impl PartialEq for WarpNode { + fn eq(&self, other: &ThreadNode) -> bool { match (self, other) { - (Self::Branch { .. }, TraceNode::Branch { .. }) => { + (Self::Branch { .. }, ThreadNode::Branch { .. }) => { self.id() == other.id() && self.branch_id() == other.branch_id() } - (Self::Reconverge { .. }, TraceNode::Reconverge { .. }) => { + (Self::Reconverge { .. }, ThreadNode::Reconverge { .. }) => { self.id() == other.id() && self.branch_id() == other.branch_id() } _ => false, @@ -104,8 +104,28 @@ impl PartialEq for WarpNode { } } -pub type WarpCFG = petgraph::graph::DiGraph; -pub type ThreadCFG = petgraph::graph::DiGraph; +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Edge(bool); + +impl Edge { + pub fn took_branch(&self) -> bool { + self.0 + } + + pub fn taken(&self) -> bool { + self.0 + } +} + +impl std::fmt::Display for Edge { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +pub type WarpCFG = petgraph::graph::DiGraph; +pub type ThreadCFG = petgraph::graph::DiGraph; pub trait UniqueGraph { fn add_unique_edge(&mut self, a: NodeIndex, b: NodeIndex, weight: E) -> EdgeIndex @@ -235,7 +255,7 @@ pub fn build_control_flow_graph( }); let mut last_warp_cfg_node_idx = warp_cfg_root_node_idx; - let thread_cfg_root_node_idx = thread_cfg.add_node(TraceNode::Branch { + let thread_cfg_root_node_idx = thread_cfg.add_node(ThreadNode::Branch { id: 0, // there cannot be more than one source node branch_id: 0, instructions: vec![], @@ -262,17 +282,21 @@ pub fn build_control_flow_graph( id: node_id, branch_id: *branch_id, }); - warp_cfg.add_unique_edge(last_warp_cfg_node_idx, super_node_idx, took_branch); + warp_cfg.add_unique_edge( + last_warp_cfg_node_idx, + super_node_idx, + Edge(took_branch), + ); last_warp_cfg_node_idx = super_node_idx; } { let instructions = std::mem::take(&mut current_instructions); - let node_idx = thread_cfg.add_node(TraceNode::Branch { + let node_idx = thread_cfg.add_node(ThreadNode::Branch { id: node_id, branch_id: *branch_id, instructions, }); - thread_cfg.add_edge(last_thread_cfg_node_idx, node_idx, took_branch); + thread_cfg.add_edge(last_thread_cfg_node_idx, node_idx, Edge(took_branch)); last_thread_cfg_node_idx = node_idx; } } @@ -288,18 +312,22 @@ pub fn build_control_flow_graph( warp_cfg.add_unique_edge( last_warp_cfg_node_idx, super_node_idx, - reconverge_took_branch, + Edge(reconverge_took_branch), ); last_warp_cfg_node_idx = super_node_idx; } { let instructions = std::mem::take(&mut current_instructions); - let node_idx = thread_cfg.add_node(TraceNode::Reconverge { + let node_idx = thread_cfg.add_node(ThreadNode::Reconverge { id: *node_id, branch_id: *branch_id, instructions, }); - thread_cfg.add_edge(last_thread_cfg_node_idx, node_idx, reconverge_took_branch); + thread_cfg.add_edge( + last_thread_cfg_node_idx, + node_idx, + Edge(reconverge_took_branch), + ); last_thread_cfg_node_idx = node_idx; } @@ -319,14 +347,18 @@ pub fn build_control_flow_graph( id: 0, // there cannot be more than one sink node branch_id: 0, }); - warp_cfg.add_unique_edge(last_warp_cfg_node_idx, warp_cfg_sink_node_idx, true); + warp_cfg.add_unique_edge(last_warp_cfg_node_idx, warp_cfg_sink_node_idx, Edge(true)); - let thread_cfg_sink_node_idx = thread_cfg.add_node(TraceNode::Reconverge { + let thread_cfg_sink_node_idx = thread_cfg.add_node(ThreadNode::Reconverge { id: 0, // there cannot be more than one sink node branch_id: 0, instructions: std::mem::take(&mut current_instructions), }); - thread_cfg.add_edge(last_thread_cfg_node_idx, thread_cfg_sink_node_idx, true); + thread_cfg.add_edge( + last_thread_cfg_node_idx, + thread_cfg_sink_node_idx, + Edge(true), + ); ( thread_cfg, (thread_cfg_root_node_idx, thread_cfg_sink_node_idx), @@ -529,6 +561,53 @@ pub mod visit { pub mod render { use std::path::Path; + pub trait Label { + fn label(&self) -> String; + } + + impl Label for crate::cfg::WarpNode { + fn label(&self) -> String { + match self { + crate::cfg::WarpNode::Branch { id, branch_id } => { + format!("BRANCH {branch_id}\n#{id}") + } + crate::cfg::WarpNode::Reconverge { id, branch_id } => { + format!("RECONVERGE {branch_id}\n#{id}") + } + } + } + } + + impl Label for crate::cfg::ThreadNode { + fn label(&self) -> String { + match self { + crate::cfg::ThreadNode::Branch { + id, + branch_id, + instructions, + } => { + format!("BRANCH {branch_id}\n#{id}\n{} instr", instructions.len()) + } + crate::cfg::ThreadNode::Reconverge { + id, + branch_id, + instructions, + } => { + format!( + "RECONVERGE {branch_id}\n#{id}\n{} instr", + instructions.len() + ) + } + } + } + } + + impl Label for crate::cfg::Edge { + fn label(&self) -> String { + format!("took branch = {}", self.0) + } + } + pub trait Render { /// Render graph as an svg image. /// @@ -541,8 +620,8 @@ pub mod render { where D: petgraph::EdgeType, Ix: petgraph::graph::IndexType, - E: std::fmt::Display, - N: std::fmt::Display, + E: Label, + N: Label, { fn render_to(&self, path: impl AsRef) -> Result<(), std::io::Error> { use layout::adt::dag::NodeHandle; @@ -554,9 +633,9 @@ pub mod render { use std::collections::HashMap; use std::io::{BufWriter, Write}; - fn node(node: &N) -> shapes::Element + fn node_circle(node: &N) -> shapes::Element where - N: std::fmt::Display, + N: Label, { let node_style = style::StyleAttr { line_color: Color::new(0x0000_00FF), @@ -567,7 +646,7 @@ pub mod render { }; let size = core::geometry::Point { x: 100.0, y: 100.0 }; shapes::Element::create( - shapes::ShapeKind::Circle(format!("{node}")), + shapes::ShapeKind::Circle(node.label()), node_style, Orientation::TopToBottom, size, @@ -577,24 +656,35 @@ pub mod render { let mut graph = VisualGraph::new(Orientation::TopToBottom); let mut handles: HashMap, NodeHandle> = HashMap::new(); - for edge_idx in self.edge_indices() { - let Some((src_node, dest_node)) = self.edge_endpoints(edge_idx) else { - continue; - }; - let src_handle = *handles - .entry(src_node) - .or_insert_with(|| graph.add_node(node(self.node_weight(src_node).unwrap()))); - - let dest_handle = *handles - .entry(src_node) - .or_insert_with(|| graph.add_node(node(self.node_weight(dest_node).unwrap()))); + // add nodes + for node_idx in self.node_indices() { + let node = self.node_weight(node_idx).unwrap(); + handles + .entry(node_idx) + .or_insert_with(|| graph.add_node(node_circle(node))); + } - let edge_weight = self.edge_weight(edge_idx).unwrap(); + // add edges + for edge_idx in self.edge_indices() { + // let Some((src_node, dest_node)) = self.edge_endpoints(edge_idx) else { + let (src_node_idx, dest_node_idx) = self.edge_endpoints(edge_idx).unwrap(); + // let src_handle = *handles + // .entry(src_node) + // .or_insert_with(|| graph.add_node(node(self.node_weight(src_node).unwrap()))); + // + // let dest_handle = *handles + // .entry(src_node) + // .or_insert_with(|| graph.add_node(node(self.node_weight(dest_node).unwrap()))); + + // let src_handle = handles[&src_node_idx]; + // let dest_handle = handles[&dest_node_idx]; + + let edge = self.edge_weight(edge_idx).unwrap(); let arrow = shapes::Arrow { start: shapes::LineEndKind::None, end: shapes::LineEndKind::Arrow, line_style: style::LineStyleKind::Normal, - text: format!("{edge_weight}"), + text: edge.label(), look: style::StyleAttr { line_color: Color::new(0x0000_00FF), line_width: 2, @@ -605,7 +695,15 @@ pub mod render { src_port: None, dst_port: None, }; - graph.add_edge(arrow, src_handle, dest_handle); + eprintln!( + "edge {} from {:?} to {:?} => {:?} to {:?}", + edge.label(), + src_node_idx, + dest_node_idx, + handles[&src_node_idx], + handles[&dest_node_idx] + ); + graph.add_edge(arrow, handles[&src_node_idx], handles[&dest_node_idx]); } // https://docs.rs/layout-rs/latest/src/layout/backends/svg.rs.html#200 diff --git a/exec/src/tracegen.rs b/exec/src/tracegen.rs index 45939a97..079e6bba 100644 --- a/exec/src/tracegen.rs +++ b/exec/src/tracegen.rs @@ -219,7 +219,7 @@ pub(crate) fn active_threads<'a>( "each node has one incoming edge except the source node" ); let active = match edges.pop() { - Some(edge) => *edge.weight() == took_branch, + Some(edge) => edge.weight().took_branch() == took_branch, None => true, }; assert!(edges.is_empty()); @@ -239,6 +239,8 @@ pub(crate) fn active_threads<'a>( /// On Pascal GTX1080, its 512B. pub const ALIGNMENT_BYTES: u64 = 256; +type WarpTrace = (WarpId, cfg::WarpCFG, [cfg::ThreadCFG; WARP_SIZE]); + impl Tracer { pub async fn run_kernel( self: &Arc, @@ -342,23 +344,16 @@ impl Tracer { block_size: &trace_model::Dim, kernel: &mut K, kernel_launch_id: u64, - ) -> Result< - impl Iterator, - Error, - > + ) -> Result, Error> where K: Kernel + Send + Sync, ::Error: Send, { - // let kernel_launch_id = self.kernel_launch_id.fetch_add(1, atomic::Ordering::SeqCst); - // let kernel_name = kernel.name().unwrap_or("unnamed").to_string(); - self.traced_instructions.lock().unwrap().clear(); self.run_kernel(grid, block_size, kernel, kernel_launch_id) .await; - // let mut trace = Vec::new(); let mut traced_instructions = self.traced_instructions.lock().unwrap(); // check for reconvergence points @@ -568,147 +563,6 @@ impl TraceGenerator for Tracer { let grid = grid.into(); let block_size = block_size.into(); - // self.traced_instructions.lock().unwrap().clear(); - // - // let grid = grid.into(); - // let block_size = block_size.into(); - // self.run_kernel(grid.clone(), block_size.clone(), kernel, kernel_launch_id) - // .await; - // - // let mut traced_instructions = self.traced_instructions.lock().unwrap(); - // - // // check for reconvergence points - // if !traced_instructions.values().all(|warp_instructions| { - // warp_instructions - // .iter() - // .all(|thread_instructions| match thread_instructions.get(0) { - // Some(first_instruction) => { - // *first_instruction == ThreadInstruction::TookBranch(0) - // } - // None => true, - // }) - // }) { - // return Err(Error::Tracer(TraceError::MissingReconvergencePoints)); - // } - // - // // sort warps - // let mut traced_instructions: Vec<_> = traced_instructions.drain().collect(); - // traced_instructions - // .sort_by_key(|(warp, _)| (warp.block_id.accelsim_id(), warp.warp_id_in_block)); - // - // for ( - // WarpId { - // block_id, - // warp_id_in_block, - // }, - // per_thread_instructions, - // ) in traced_instructions.into_iter() - // { - // if log::log_enabled!(log::Level::Debug) { - // let per_thread_instruction_count: Vec<_> = per_thread_instructions - // .iter() - // .map(|per_thread| per_thread.iter().map(|inst| inst.is_access()).count()) - // .collect(); - // let total_thread_instruction_count = - // per_thread_instruction_count.iter().sum::(); - // let mean_thread_instruction_count = - // per_thread_instruction_count.iter().sum::() as f32 - // / per_thread_instruction_count - // .iter() - // .filter(|n| **n > 0) - // .count() as f32; - // - // log::debug!( - // "==> block {:?} warp {:<3} has {} trace instructions ({:.2} per thread)", - // block_id, - // warp_id_in_block, - // total_thread_instruction_count, - // mean_thread_instruction_count, - // ); - // } - // - // let mut warp_cfg = cfg::WarpCFG::new(); - // let warp_cfg_root_node_idx = warp_cfg.add_unique_node(cfg::WarpNode::Branch { - // id: 0, - // branch_id: 0, - // }); - // - // let start = Instant::now(); - // let mut thread_graphs = [(); WARP_SIZE as usize].map(|_| cfg::ThreadCFG::default()); - // for (ti, thread_instructions) in per_thread_instructions.iter().enumerate() { - // let (thread_cfg, (thread_cfg_root_node_idx, thread_cfg_sink_node_idx)) = - // cfg::build_control_flow_graph(thread_instructions, &mut warp_cfg); - // - // #[cfg(debug_assertions)] - // { - // let paths: Vec> = cfg::all_simple_paths( - // &thread_cfg, - // thread_cfg_root_node_idx, - // thread_cfg_sink_node_idx, - // ) - // .collect(); - // - // // each edge connects two distinct nodes, resulting in a - // // single control flow path each thread takes - // debug_assert_eq!(paths.len(), 1); - // log::trace!( - // "thread[{:2}] = {:?}", - // ti, - // cfg::format_control_flow_path(&thread_cfg, &paths[0]).join(" ") - // ); - // } - // - // thread_graphs[ti] = thread_cfg; - // } - // - // if log::log_enabled!(log::Level::Debug) { - // let per_thread_cfg_node_count: Vec<_> = - // thread_graphs.iter().map(|tg| tg.node_count()).collect(); - // log::debug!( - // "==> block {:?} warp {:<3} built thread graphs in {:?} (nodes: mean={:.2} max={} min={})", - // block_id, - // warp_id_in_block, - // start.elapsed(), - // per_thread_cfg_node_count.iter().sum::() as f32 / WARP_SIZE as f32, - // per_thread_cfg_node_count.iter().max().copied().unwrap_or(0), - // per_thread_cfg_node_count.iter().min().copied().unwrap_or(0), - // ); - // } - // - // // fill remaining edges (this should be optional step) - // // cfg::add_missing_control_flow_edges(&mut warp_cfg); - // - // let mut unique_control_flow_path_count: Option = None; - // #[cfg(debug_assertions)] - // if false { - // let warp_cfg_sink_node_idx = warp_cfg - // .find_node(&cfg::WarpNode::Reconverge { - // id: 0, - // branch_id: 0, - // }) - // .unwrap(); - // - // unique_control_flow_path_count = Some( - // super::cfg::all_simple_paths::, _>( - // &warp_cfg, - // warp_cfg_root_node_idx, - // warp_cfg_sink_node_idx, - // ) - // .count(), - // ); - // }; - // - // log::debug!( - // "super CFG: {} nodes, {} edges, {} edge weights, {} unique control flow paths", - // warp_cfg.node_count(), - // warp_cfg.edge_count(), - // warp_cfg.edge_weights().count(), - // unique_control_flow_path_count - // .as_ref() - // .map(ToString::to_string) - // .unwrap_or("?".to_string()), - // ); - let cfg_iter = self .trace_control_flow_graphs(&grid, &block_size, kernel, kernel_launch_id) .await?; @@ -759,11 +613,11 @@ impl TraceGenerator for Tracer { let iter = cfg::visit::DominatedDfs::new(&warp_cfg, warp_cfg_root_node_idx); for (edge_idx, node_idx) in iter { - let took_branch = warp_cfg[edge_idx]; + let edge = warp_cfg[edge_idx]; log::trace!( "trace assembly: node={} took branch={}", warp_cfg[node_idx], - took_branch + edge.took_branch() ); // useful for debugging @@ -779,7 +633,7 @@ impl TraceGenerator for Tracer { // add the instructions let active_threads: Vec<_> = - active_threads(&thread_cfgs, &warp_cfg[node_idx], took_branch).collect(); + active_threads(&thread_cfgs, &warp_cfg[node_idx], edge.took_branch()).collect(); // find longest branch // the length can differ if we have loops with different number of repetitions @@ -1043,6 +897,7 @@ mod tests { use crate::model::MemorySpace; use color_eyre::eyre; use num_traits::Float; + use std::path::PathBuf; use tokio::sync::Mutex; use utils::diff; @@ -1306,11 +1161,11 @@ mod tests { println!("{}", inst); } - // let ref_warp_traces = get_reference_warp_traces("two_level_nested_if_balanced")?; - // let ref_first_warp = &ref_warp_traces[&(trace_model::Dim::ZERO, 0)]; - // for inst in testing::simplify_warp_trace(ref_first_warp) { - // println!("{}", inst); - // } + let ref_warp_traces = get_reference_warp_traces("two_level_nested_if_balanced")?; + let ref_first_warp = &ref_warp_traces[&(trace_model::Dim::ZERO, 0)]; + for inst in fmt::simplify_warp_trace(ref_first_warp, true) { + println!("{}", inst); + } diff::assert_eq!( have: fmt::simplify_warp_trace(first_warp, true).collect::>(), @@ -1381,11 +1236,11 @@ mod tests { println!("{}", inst); } - // let ref_warp_traces = get_reference_warp_traces("two_level_nested_if_balanced")?; - // let ref_first_warp = &ref_warp_traces[&(trace_model::Dim::ZERO, 0)]; - // for inst in testing::simplify_warp_trace(ref_first_warp) { - // println!("{}", inst); - // } + let ref_warp_traces = get_reference_warp_traces("two_level_nested_if_balanced")?; + let ref_first_warp = &ref_warp_traces[&(trace_model::Dim::ZERO, 0)]; + for inst in fmt::simplify_warp_trace(ref_first_warp, true) { + println!("{}", inst); + } diff::assert_eq!( have: fmt::simplify_warp_trace(first_warp, true).collect::>(), @@ -1545,20 +1400,37 @@ mod tests { // &name[..name.len() - 3] // }}; // } - - use std::path::PathBuf; - - pub fn render_graphs(name: &str) { - let testing_dir = testing_dir(); - // - } + // let test = PathBuf::from(file!()).parent().unwrap().to_path_buf(); pub fn testing_dir() -> PathBuf { let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let testing_dir = manifest_dir.join("testing"); - std::fs::create_dir_all(&testing_dir).ok(); testing_dir - // let test = PathBuf::from(file!()).parent().unwrap().to_path_buf(); + } + + pub fn render_graphs( + cfg_iter: impl Iterator, + name: &str, + ) -> Result<(), std::io::Error> { + use crate::cfg::render::Render; + + let graphs_dir = testing_dir().join(name); + std::fs::create_dir_all(&graphs_dir).ok(); + for (warp_id, warp_cfg, _thread_cfgs) in cfg_iter { + // dbg!(&warp_id, &warp_cfg, &thread_cfgs); + let super::WarpId { + block_id, + warp_id_in_block, + } = warp_id; + let name = format!( + "block_{:_>3}_{:_>3}_{:_>3}_warp_{:_>2}", + block_id.x, block_id.y, block_id.z, warp_id_in_block + ); + let graph_path = graphs_dir.join(format!("{}.svg", name)); + dbg!(&graph_path); + warp_cfg.render_to(&graph_path)?; + } + Ok(()) } #[allow(clippy::cast_precision_loss, clippy::cast_sign_loss)] @@ -1672,12 +1544,10 @@ mod tests { let grid: trace_model::Dim = grid_size.into(); let block_size: trace_model::Dim = block_size.into(); - for (warp_id, warp_cfg, thread_cfgs) in tracer + let cfg_iter = tracer .trace_control_flow_graphs(&grid, &block_size, &mut kernel, 0) - .await? - { - dbg!(&warp_id, &warp_cfg, &thread_cfgs); - } + .await?; + render_graphs(cfg_iter, "vectoradd")?; tracer.run_kernel(&grid, &block_size, &mut kernel, 0).await; diff --git a/exec/testing/vectoradd/block___0___0___0_warp__0.svg b/exec/testing/vectoradd/block___0___0___0_warp__0.svg new file mode 100644 index 00000000..1791c8d1 --- /dev/null +++ b/exec/testing/vectoradd/block___0___0___0_warp__0.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + +BRANCH 0#0 +BRANCH 1#0 +RECONVERGE 1#0 +BRANCH 2#0 +RECONVERGE 2#0 +RECONVERGE 0#0took branch = truetook branch = falsetook branch = falsetook branch = truetook branch = truetook branch = false + + + + + + \ No newline at end of file