diff --git a/Cargo.lock b/Cargo.lock index 18ea0c5..68935fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -648,6 +648,29 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.4.2", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.48", + "which", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -842,6 +865,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -890,6 +922,17 @@ dependencies = [ "half", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clipboard-win" version = "4.5.0" @@ -1688,6 +1731,12 @@ dependencies = [ "xml-rs", ] +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "glow" version = "0.13.1" @@ -2052,6 +2101,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "legion_prof_viewer" version = "0.3.0" @@ -2067,6 +2122,7 @@ dependencies = [ "getrandom", "itertools", "log", + "nvtxw", "percentage", "rand", "rayon", @@ -2221,6 +2277,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.2" @@ -2310,6 +2372,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num" version = "0.2.1" @@ -2423,6 +2495,16 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "nvtxw" +version = "0.1.0" +source = "git+https://github.com/NVIDIA/NVTX.git?branch=nvtxw-rs#75b51870c83e272b745803f1cfed5c16eb47a261" +dependencies = [ + "bindgen", + "cc", + "static_assertions", +] + [[package]] name = "objc" version = "0.2.7" @@ -2721,6 +2803,16 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "prettyplease" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" +dependencies = [ + "proc-macro2", + "syn 2.0.48", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -2945,6 +3037,12 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.4.0" @@ -3120,6 +3218,12 @@ dependencies = [ "digest", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -3811,6 +3915,18 @@ dependencies = [ "web-sys", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.31", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 34ab637..ef32449 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ rust-version = "1.74" default = [] client = ["dep:reqwest", "dep:url"] server = ["dep:actix-cors", "dep:actix-web"] +nvtxw = ["dep:nvtxw"] [dependencies] egui = "0.25.0" @@ -53,6 +54,8 @@ url = { version = "2", optional = true } actix-web = { version = "4", optional = true } actix-cors = { version = "0.6", optional = true } +# nvtxw: +nvtxw = { git = "https://github.com/NVIDIA/NVTX.git", branch = "nvtxw-rs", optional = true } # native: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] diff --git a/src/lib.rs b/src/lib.rs index 753c67b..7f6cb2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,8 @@ pub mod deferred_data; pub mod file_data; pub mod http; pub mod merge_data; +#[cfg(feature = "nvtxw")] +pub mod nvtxw; #[cfg(not(target_arch = "wasm32"))] pub mod parallel_data; pub mod timestamp; diff --git a/src/nvtxw.rs b/src/nvtxw.rs new file mode 100644 index 0000000..39ead85 --- /dev/null +++ b/src/nvtxw.rs @@ -0,0 +1,411 @@ +use std::collections::BTreeMap; +use std::ffi::{c_char, c_void}; +use std::ffi::{CString, OsString}; +use std::io; +use std::iter::zip; +use std::mem::size_of; +use std::ptr::{null, null_mut}; + +use nvtxw::nvtxw; + +use crate::data::{DataSourceInfo, EntryID, EntryIndex, EntryInfo, SlotMetaTile, SlotTile, TileID}; +use crate::deferred_data::{CountingDeferredDataSource, DeferredDataSource}; + +const LEGION_DOMAIN_NAME: &str = "Legion"; + +pub struct NVTXW { + data_source: CountingDeferredDataSource, + backend: Option, + output: OsString, + force: bool, + merge: Option, + zero_time: i64, +} + +type ResultVec = Vec<(EntryID, String, String)>; +type UnmatchedTileHold = BTreeMap, Option)>; + +fn walk_entry_list(info: &EntryInfo) -> ResultVec { + let mut result = Vec::new(); + fn walk(info: &EntryInfo, entry_id: EntryID, result: &mut ResultVec, hierarchy: String) { + match info { + EntryInfo::Panel { + summary, + slots, + short_name, + .. + } => { + if let Some(summary) = summary { + walk( + summary, + entry_id.summary(), + result, + if entry_id.level() > 0 { + format!("{}/{}", hierarchy, short_name) + } else { + hierarchy.clone() + }, + ); + } + for (i, slot) in slots.iter().enumerate() { + walk( + slot, + entry_id.child(i as u64), + result, + if entry_id.level() > 0 { + format!("{}/{}", hierarchy, short_name) + } else { + hierarchy.clone() + }, + ) + } + } + EntryInfo::Slot { + long_name, + short_name, + .. + } => { + result.push(( + entry_id.clone(), + long_name.clone(), + format!("{}/{}", hierarchy, short_name), + )); + } + EntryInfo::Summary { .. } => { + // When implementing counters, fill this in. + } + } + } + walk( + info, + EntryID::root(), + &mut result, + LEGION_DOMAIN_NAME.to_string(), + ); + result +} + +#[repr(C)] +#[derive(Debug)] +struct legion_nvtxw_event { + time_start: u64, + time_stop: u64, + name: *const c_char, + color: u32, +} + +// See nvToolsExtPayload.h: nvtxPayloadSchemaAttr_t::schemaId +// See NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START +const LEGION_NVTXW_PAYLOAD_SCHEMA_ID: u64 = 0x1c0ffee; +const LEGION_NVTXW_PAYLOAD_NAME_SCHEMA_ID: u64 = 0x2c0ffee; + +impl NVTXW { + pub fn new( + data_source: T, + backend: Option, + output: OsString, + force: bool, + merge: Option, + zero_time: i64, + ) -> Self { + Self { + data_source: CountingDeferredDataSource::new(data_source), + backend, + output, + force, + merge, + zero_time, + } + } + + fn check_info(&mut self) -> Option { + // We requested this once, so we know we'll get zero or one result + self.data_source.get_infos().pop() + } + + fn write_matched_tile( + interface: &nvtxw::InterfaceHandle, + streams: &BTreeMap, + zero_time: i64, + tile: &SlotTile, + meta_tile: &SlotMetaTile, + ) { + assert!(tile.data.items.len() == meta_tile.data.items.len()); + + for (row, meta_row) in zip(&tile.data.items, &meta_tile.data.items) { + assert!(row.len() == meta_row.len()); + + for (item, meta_item) in zip(row, meta_row) { + let time_start = item.interval.start; + let time_stop = item.interval.stop; + let color = item.color; + // let time_start = meta_item.original_interval.start; + // let time_stop = meta_item.original_interval.stop; + let title = meta_item.title.clone(); + + let c_name = CString::new(title).expect("CString::new failed"); + let events = [legion_nvtxw_event { + time_start: (time_start.0 as u64) + .checked_add(zero_time.try_into().unwrap()) + .expect("time_start overflowed"), + time_stop: (time_stop.0 as u64) + .checked_add(zero_time.try_into().unwrap()) + .expect("time_stop overflowed"), + name: c_name.as_ptr(), + color: ((color.r() as u32) << 16) + | ((color.g() as u32) << 8) + | (color.b() as u32) + | (0xFF << 24), + }]; + + let stream = streams[&tile.entry_id]; + + let payloads = [ + nvtxw::PayloadData { + schemaId: LEGION_NVTXW_PAYLOAD_NAME_SCHEMA_ID, + size: usize::MAX, + payload: c_name.as_ptr() as *const c_void, + }, + nvtxw::PayloadData { + schemaId: LEGION_NVTXW_PAYLOAD_SCHEMA_ID, + size: size_of::(), + payload: events.as_ptr() as *const c_void, + }, + ]; + + nvtxw::event_write(interface, stream, &payloads).expect("Failed to write event"); + } + } + } + + fn process_events( + data_source: &mut CountingDeferredDataSource, + interface: &nvtxw::InterfaceHandle, + streams: &BTreeMap, + zero_time: i64, + unmatched_tiles: &mut UnmatchedTileHold, + num_requests: u64, + ) { + while data_source.outstanding_requests() > num_requests { + // When implementing counters, uncomment this. + // let summary_tiles = data_source.get_summary_tiles(); + let slot_tiles = data_source.get_slot_tiles(); + let slot_meta_tiles = data_source.get_slot_meta_tiles(); + + for tile in slot_tiles { + let e = tile.entry_id.clone(); + unmatched_tiles.entry(e).or_insert((None, None)).0 = Some(tile); + } + + for meta_tile in slot_meta_tiles { + let e = meta_tile.entry_id.clone(); + unmatched_tiles.entry(e).or_insert((None, None)).1 = Some(meta_tile); + } + + unmatched_tiles.retain(|_entry_id, (ut, um)| { + if let (Some(tile), Some(meta_tile)) = (ut, um) { + Self::write_matched_tile(interface, streams, zero_time, tile, meta_tile); + return false; + } + true + }); + } + } + + pub fn write(mut self) -> io::Result<()> { + self.data_source.fetch_info(); + let mut info = None; + while info.is_none() { + info = self.check_info(); + } + let info = info.unwrap(); + + let entry_ids = walk_entry_list(&info.entry_info); + + let full_range_tile_id = TileID(info.interval); + let full = true; + + // For now, this only works on dynamic data sources + assert!(info.tile_set.tiles.is_empty()); + + println!("Exporting to NVTXW"); + + let interface = nvtxw::initialize_simple(self.backend).expect("Failed to initialize NVTXW"); + + let session = nvtxw::session_begin_simple(&interface, self.output, self.force, self.merge) + .expect("Failed to create session"); + + let c_event_name = CString::new("Legion Event").expect("CString::new failed"); + + let c_field_name_time_start = CString::new("time_start").expect("CString::new failed"); + let c_field_name_time_stop = CString::new("time_stop").expect("CString::new failed"); + let c_field_name_name = CString::new("name").expect("CString::new failed"); + let c_field_name_color = CString::new("color").expect("CString::new failed"); + + // C string fields must be specified as their own payload in addition to a field if their size is dynamic at runtime. + + let name_schema = [nvtxw::PayloadSchemaEntry { + flags: nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE + | nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED, + type_: nvtxw::NVTX_PAYLOAD_ENTRY_TYPE_CSTRING, + name: c_field_name_name.as_ptr(), + description: null(), + arrayOrUnionDetail: 0, + offset: 0, + semantics: null(), + reserved: null(), + }]; + + let name_schema_attr = nvtxw::PayloadSchemaAttr { + fieldMask: nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_TYPE + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_FLAGS + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_ENTRIES + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_NUM_ENTRIES + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_SCHEMA_ID, + name: null(), + type_: nvtxw::NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC, + flags: nvtxw::NVTX_PAYLOAD_SCHEMA_FLAG_REFERENCED, + entries: name_schema.as_ptr(), + numEntries: name_schema.len(), + payloadStaticSize: 0, + packAlign: 0, + schemaId: LEGION_NVTXW_PAYLOAD_NAME_SCHEMA_ID, + extension: null_mut(), + }; + + let event_schema = [ + nvtxw::PayloadSchemaEntry { + flags: nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_RANGE_BEGIN + | nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_EVENT_TIMESTAMP, + type_: nvtxw::NVTX_PAYLOAD_ENTRY_TYPE_UINT64, + name: c_field_name_time_start.as_ptr(), + description: null(), + arrayOrUnionDetail: 0, + offset: 0, + semantics: null(), + reserved: null(), + }, + nvtxw::PayloadSchemaEntry { + flags: nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_RANGE_END + | nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_EVENT_TIMESTAMP, + type_: nvtxw::NVTX_PAYLOAD_ENTRY_TYPE_UINT64, + name: c_field_name_time_stop.as_ptr(), + description: null(), + arrayOrUnionDetail: 0, + offset: 0, + semantics: null(), + reserved: null(), + }, + nvtxw::PayloadSchemaEntry { + flags: nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE + | nvtxw::NVTX_PAYLOAD_ENTRY_FLAG_POINTER, + type_: nvtxw::NVTX_PAYLOAD_ENTRY_TYPE_CSTRING, + name: c_field_name_name.as_ptr(), + description: null(), + arrayOrUnionDetail: 0, + offset: 0, + semantics: null(), + reserved: null(), + }, + nvtxw::PayloadSchemaEntry { + flags: 0, + type_: nvtxw::NVTX_PAYLOAD_ENTRY_TYPE_COLOR_ARGB, + name: c_field_name_color.as_ptr(), + description: null(), + arrayOrUnionDetail: 0, + offset: 0, + semantics: null(), + reserved: null(), + }, + ]; + + let event_schema_attr = nvtxw::PayloadSchemaAttr { + fieldMask: nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_NAME + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_TYPE + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_ENTRIES + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_NUM_ENTRIES + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_STATIC_SIZE + | nvtxw::NVTX_PAYLOAD_SCHEMA_ATTR_SCHEMA_ID, + name: c_event_name.as_ptr(), + type_: nvtxw::NVTX_PAYLOAD_SCHEMA_TYPE_STATIC, + flags: nvtxw::NVTX_PAYLOAD_SCHEMA_FLAG_NONE, + entries: event_schema.as_ptr(), + numEntries: event_schema.len(), + payloadStaticSize: size_of::(), + packAlign: 0, + schemaId: LEGION_NVTXW_PAYLOAD_SCHEMA_ID, + extension: null_mut(), + }; + + let mut streams: BTreeMap = BTreeMap::new(); + for (entry_id, long_name, hierarchy) in &entry_ids { + let stream_name = format!("{} {}", LEGION_DOMAIN_NAME, long_name); + let domain_name = hierarchy.to_string(); + + let stream = nvtxw::stream_open_simple(&interface, session, stream_name, domain_name) + .expect("Failed to create stream"); + + nvtxw::schema_register(&interface, stream, &name_schema_attr) + .expect("Failed to register name schema"); + + nvtxw::schema_register(&interface, stream, &event_schema_attr) + .expect("Failed to register event schema"); + + streams.insert(entry_id.clone(), stream); + } + + let zero_time = self.zero_time; + + const MAX_IN_FLIGHT_REQUESTS: u64 = 100; + + let mut unmatched_tiles: UnmatchedTileHold = BTreeMap::new(); + + for (entry_id, _, _) in &entry_ids { + match entry_id.last_index().unwrap() { + EntryIndex::Summary => { + // When implementing counters, uncomment this. + /* + self.data_source + .fetch_summary_tile(entry_id, full_range_tile_id, full); + */ + } + EntryIndex::Slot(..) => { + self.data_source + .fetch_slot_tile(entry_id, full_range_tile_id, full); + self.data_source + .fetch_slot_meta_tile(entry_id, full_range_tile_id, full); + } + } + + Self::process_events( + &mut self.data_source, + &interface, + &streams, + zero_time, + &mut unmatched_tiles, + MAX_IN_FLIGHT_REQUESTS, + ); + } + + Self::process_events( + &mut self.data_source, + &interface, + &streams, + zero_time, + &mut unmatched_tiles, + 0, + ); + + assert!(unmatched_tiles.is_empty()); + + for (_entry_id, stream) in streams { + nvtxw::stream_close(&interface, stream).expect("Failed to close stream"); + } + + nvtxw::session_end(&interface, session).expect("Failed to end session"); + + nvtxw::unload(&interface); + + Ok(()) + } +}