From 2fb5db0220c02b2c5da204fcc1c9ac6792452956 Mon Sep 17 00:00:00 2001 From: Piotr Magiera <56825108+piotmag769@users.noreply.github.com> Date: Wed, 22 May 2024 18:26:00 +0200 Subject: [PATCH] `--split-generics` flag (#63) --- CHANGELOG.md | 1 + Cargo.lock | 1 + Cargo.toml | 1 + crates/cairo-profiler/Cargo.toml | 1 + crates/cairo-profiler/src/main.rs | 14 ++- crates/cairo-profiler/src/profile_builder.rs | 3 +- crates/cairo-profiler/src/profiler_config.rs | 31 +++++++ crates/cairo-profiler/src/trace_reader.rs | 33 +++---- .../trace_reader/function_trace_builder.rs | 92 ++++++------------- .../src/trace_reader/functions.rs | 60 ++++++++++++ crates/cairo-profiler/tests/e2e.rs | 1 + 11 files changed, 149 insertions(+), 89 deletions(-) create mode 100644 crates/cairo-profiler/src/profiler_config.rs create mode 100644 crates/cairo-profiler/src/trace_reader/functions.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index f8e19de..b54ffc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `--max-function-trace-depth` allowing to specify maximum depth of the function tree in function level profiling +- `--split-generics` flag allowing to differentiate between non-inlined generics monomorphised with different types ## [0.3.0] - 2024-04-20 diff --git a/Cargo.lock b/Cargo.lock index 5adb66b..3ea7e4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -443,6 +443,7 @@ dependencies = [ "flate2", "hex", "itertools 0.11.0", + "lazy_static", "project-root", "prost", "prost-build", diff --git a/Cargo.toml b/Cargo.toml index 6993b6c..2ea20cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ test-case = "3.3.1" itertools = "0.11.0" tempfile = "3.10.1" regex = "1.10" +lazy_static = "1.4.0" cairo-lang-sierra = { git = "https://github.com/starkware-libs/cairo.git", rev = "852f8fb" } cairo-lang-sierra-to-casm = { git = "https://github.com/starkware-libs/cairo.git", rev = "852f8fb" } diff --git a/crates/cairo-profiler/Cargo.toml b/crates/cairo-profiler/Cargo.toml index 64a0e24..7b12ebb 100644 --- a/crates/cairo-profiler/Cargo.toml +++ b/crates/cairo-profiler/Cargo.toml @@ -19,6 +19,7 @@ anyhow.workspace = true itertools.workspace = true tempfile.workspace = true regex.workspace = true +lazy_static.workspace = true trace-data = { path = "../trace-data" } cairo-lang-sierra.workspace = true diff --git a/crates/cairo-profiler/src/main.rs b/crates/cairo-profiler/src/main.rs index 5187d43..5c47031 100644 --- a/crates/cairo-profiler/src/main.rs +++ b/crates/cairo-profiler/src/main.rs @@ -3,6 +3,7 @@ use std::{ io::{Read, Write}, }; +use crate::profiler_config::ProfilerConfig; use crate::sierra_loader::collect_and_compile_all_sierra_programs; use crate::trace_reader::collect_samples_from_trace; use anyhow::{Context, Result}; @@ -15,6 +16,7 @@ use prost::Message; use trace_data::CallTrace; mod profile_builder; +mod profiler_config; mod sierra_loader; mod trace_reader; @@ -36,15 +38,20 @@ struct Cli { /// Specify maximum depth of function tree in function level profiling. /// The is applied per entrypoint - each entrypoint function tree is treated separately. /// Keep in mind recursive functions are also taken into account even though they are later - /// aggregated to one function call. + /// aggregated to a single function call. #[arg(long, default_value_t = 100)] max_function_trace_depth: usize, + + /// Split non-inlined generic functions based on the type they were monomorphised with. + /// E.g. treat `function` as different from `function`. + #[arg(long)] + split_generics: bool, } fn main() -> Result<()> { let cli = Cli::parse(); - let data = fs::read_to_string(cli.path_to_trace_data) + let data = fs::read_to_string(&cli.path_to_trace_data) .context("Failed to read call trace from a file")?; let serialized_trace: CallTrace = serde_json::from_str(&data).context("Failed to deserialize call trace")?; @@ -53,8 +60,7 @@ fn main() -> Result<()> { let samples = collect_samples_from_trace( &serialized_trace, &compiled_artifacts_path_map, - cli.show_details, - cli.max_function_trace_depth, + &ProfilerConfig::from_cli(&cli), )?; let profile = build_profile(&samples); diff --git a/crates/cairo-profiler/src/profile_builder.rs b/crates/cairo-profiler/src/profile_builder.rs index b9bf952..696283b 100644 --- a/crates/cairo-profiler/src/profile_builder.rs +++ b/crates/cairo-profiler/src/profile_builder.rs @@ -10,7 +10,8 @@ use std::collections::{HashMap, HashSet}; pub use perftools::profiles as pprof; -use crate::trace_reader::{ContractCallSample, FunctionName, MeasurementUnit, MeasurementValue}; +use crate::trace_reader::functions::FunctionName; +use crate::trace_reader::{ContractCallSample, MeasurementUnit, MeasurementValue}; #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] pub struct StringId(pub u64); diff --git a/crates/cairo-profiler/src/profiler_config.rs b/crates/cairo-profiler/src/profiler_config.rs new file mode 100644 index 0000000..6e07800 --- /dev/null +++ b/crates/cairo-profiler/src/profiler_config.rs @@ -0,0 +1,31 @@ +use crate::Cli; + +pub struct ProfilerConfig { + pub show_details: bool, + pub max_function_trace_depth: usize, + pub split_generics: bool, +} + +impl ProfilerConfig { + pub fn from_cli(cli: &Cli) -> ProfilerConfig { + ProfilerConfig { + show_details: cli.show_details, + max_function_trace_depth: cli.max_function_trace_depth, + split_generics: cli.split_generics, + } + } +} + +pub struct FunctionLevelConfig { + pub max_function_trace_depth: usize, + pub split_generics: bool, +} + +impl FunctionLevelConfig { + pub fn from_profiler_config(profiler_config: &ProfilerConfig) -> FunctionLevelConfig { + FunctionLevelConfig { + max_function_trace_depth: profiler_config.max_function_trace_depth, + split_generics: profiler_config.split_generics, + } + } +} diff --git a/crates/cairo-profiler/src/trace_reader.rs b/crates/cairo-profiler/src/trace_reader.rs index a7b0fea..11bd1ff 100644 --- a/crates/cairo-profiler/src/trace_reader.rs +++ b/crates/cairo-profiler/src/trace_reader.rs @@ -1,26 +1,19 @@ use core::fmt; -use itertools::Itertools; use std::collections::HashMap; use std::fmt::Display; +use crate::profiler_config::{FunctionLevelConfig, ProfilerConfig}; use crate::sierra_loader::CompiledArtifactsPathMap; use crate::trace_reader::function_trace_builder::collect_profiling_info; +use crate::trace_reader::functions::FunctionName; use anyhow::{Context, Result}; +use itertools::Itertools; use trace_data::{ CallTrace, CallTraceNode, ContractAddress, EntryPointSelector, ExecutionResources, L1Resources, }; mod function_trace_builder; - -#[derive(Clone, Hash, Eq, PartialEq)] -pub struct FunctionName(pub String); - -impl FunctionName { - #[inline] - fn from(entry_point_id: &EntryPointId) -> FunctionName { - FunctionName(format!("{entry_point_id}")) - } -} +pub mod functions; #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct MeasurementUnit(pub String); @@ -155,8 +148,7 @@ impl Display for EntryPointId { pub fn collect_samples_from_trace( trace: &CallTrace, compiled_artifacts_path_map: &CompiledArtifactsPathMap, - show_details: bool, - max_function_trace_depth: usize, + profiler_config: &ProfilerConfig, ) -> Result> { let mut samples = vec![]; let mut current_path = vec![]; @@ -166,8 +158,7 @@ pub fn collect_samples_from_trace( &mut current_path, trace, compiled_artifacts_path_map, - show_details, - max_function_trace_depth, + profiler_config, )?; Ok(samples) } @@ -177,15 +168,14 @@ fn collect_samples<'a>( current_call_stack: &mut Vec, trace: &'a CallTrace, compiled_artifacts_path_map: &CompiledArtifactsPathMap, - show_details: bool, - max_function_trace_depth: usize, + profiler_config: &ProfilerConfig, ) -> Result<&'a ExecutionResources> { current_call_stack.push(EntryPointId::from( trace.entry_point.contract_name.clone(), trace.entry_point.function_name.clone(), trace.entry_point.contract_address.clone(), trace.entry_point.entry_point_selector.clone(), - show_details, + profiler_config.show_details, )); let maybe_entrypoint_steps = if let Some(cairo_execution_info) = &trace.cairo_execution_info { @@ -207,8 +197,8 @@ fn collect_samples<'a>( compiled_artifacts.sierra.get_program_artifact(), &compiled_artifacts.casm_debug_info, compiled_artifacts.sierra.was_run_with_header(), - max_function_trace_depth, - )?; + &FunctionLevelConfig::from_profiler_config(profiler_config), + ); for mut function_stack_trace in profiling_info.functions_stack_traces { let mut function_trace = current_call_stack @@ -239,8 +229,7 @@ fn collect_samples<'a>( current_call_stack, sub_trace, compiled_artifacts_path_map, - show_details, - max_function_trace_depth, + profiler_config, )?; } } diff --git a/crates/cairo-profiler/src/trace_reader/function_trace_builder.rs b/crates/cairo-profiler/src/trace_reader/function_trace_builder.rs index 4e0fb2b..c9cdee7 100644 --- a/crates/cairo-profiler/src/trace_reader/function_trace_builder.rs +++ b/crates/cairo-profiler/src/trace_reader/function_trace_builder.rs @@ -1,11 +1,10 @@ -use crate::trace_reader::FunctionName; -use anyhow::{Context, Result}; +use crate::profiler_config::FunctionLevelConfig; +use crate::trace_reader::functions::{FunctionName, FunctionStackTrace}; use cairo_lang_sierra::extensions::core::{CoreConcreteLibfunc, CoreLibfunc, CoreType}; use cairo_lang_sierra::program::{GenStatement, Program, ProgramArtifact, StatementIdx}; use cairo_lang_sierra::program_registry::ProgramRegistry; use cairo_lang_sierra_to_casm::compiler::CairoProgramDebugInfo; use itertools::{chain, Itertools}; -use regex::Regex; use std::collections::HashMap; use std::ops::AddAssign; use trace_data::TraceEntry; @@ -15,11 +14,6 @@ pub struct ProfilingInfo { pub header_steps: Steps, } -pub struct FunctionStackTrace { - pub stack_trace: Vec, - pub steps: Steps, -} - #[derive(Clone, Copy, Eq, PartialEq)] pub struct Steps(pub usize); @@ -50,8 +44,8 @@ pub fn collect_profiling_info( program_artifact: &ProgramArtifact, casm_debug_info: &CairoProgramDebugInfo, was_run_with_header: bool, - max_function_trace_depth: usize, -) -> Result { + function_level_config: &FunctionLevelConfig, +) -> ProfilingInfo { let program = &program_artifact.program; let sierra_program_registry = &ProgramRegistry::::new(program).unwrap(); @@ -122,8 +116,7 @@ pub fn collect_profiling_info( } }; - let user_function_idx = - user_function_idx_by_sierra_statement_idx(program, sierra_statement_idx); + let user_function_idx = user_function_idx(sierra_statement_idx, program); let Some(gen_statement) = program.statements.get(sierra_statement_idx.0) else { panic!("Failed fetching statement index {}", sierra_statement_idx.0); @@ -136,7 +129,7 @@ pub fn collect_profiling_info( Ok(CoreConcreteLibfunc::FunctionCall(_)) ) { // Push to the stack. - if function_stack_depth < max_function_trace_depth { + if function_stack_depth < function_level_config.max_function_trace_depth { function_stack.push((user_function_idx, current_function_steps)); current_function_steps = Steps(0); } @@ -145,7 +138,7 @@ pub fn collect_profiling_info( } GenStatement::Return(_) => { // Pop from the stack. - if function_stack_depth <= max_function_trace_depth { + if function_stack_depth <= function_level_config.max_function_trace_depth { let current_stack = chain!(function_stack.iter().map(|f| f.0), [user_function_idx]) .collect_vec(); @@ -165,66 +158,41 @@ pub fn collect_profiling_info( } } - if !was_run_with_header { - assert!(header_steps == Steps(0)); - } + let real_functions_stack_traces = functions_stack_traces_steps + .into_iter() + .map(|(idx_stack_trace, steps)| FunctionStackTrace { + stack_trace: to_function_stack_trace(&idx_stack_trace, program), + steps, + }) + .collect_vec(); - let functions_stack_traces = functions_stack_traces_steps + let displayable_functions_stack_traces = real_functions_stack_traces .iter() - .map(|(idx_stack_trace, steps)| { - Ok(FunctionStackTrace { - stack_trace: index_stack_trace_to_function_name_stack_trace( - program, - idx_stack_trace, - )?, - steps: *steps, - }) + .map(|function_stack_trace| { + function_stack_trace + .to_displayable_function_stack_trace(function_level_config.split_generics) }) - .collect::>()?; + .collect_vec(); - let profiling_info = ProfilingInfo { - functions_stack_traces, + ProfilingInfo { + functions_stack_traces: displayable_functions_stack_traces, header_steps, - }; - - Ok(profiling_info) + } } -fn index_stack_trace_to_function_name_stack_trace( - sierra_program: &Program, +fn to_function_stack_trace( idx_stack_trace: &[UserFunctionSierraIndex], -) -> Result> { - let re_loop_func = Regex::new(r"\[expr\d*\]") - .context("Failed to create regex normalising loop functions names")?; - - let re_monomorphization = Regex::new(r"<.*>") - .context("Failed to create regex normalising mononorphised generic functions names")?; - - let stack_with_recursive_functions = idx_stack_trace + sierra_program: &Program, +) -> Vec { + idx_stack_trace .iter() - .map(|idx| { - let sierra_func_name = &sierra_program.funcs[idx.0].id.to_string(); - // Remove suffix in case of loop function e.g. `[expr36]`. - let func_name = re_loop_func.replace(sierra_func_name, ""); - // Remove parameters from monomorphised Cairo generics e.g. ``. - re_monomorphization.replace(&func_name, "").to_string() - }) - .collect_vec(); - - // Consolidate recursive function calls into one function call - they mess up the flame graph. - let mut result = vec![stack_with_recursive_functions[0].clone()]; - for i in 1..stack_with_recursive_functions.len() { - if stack_with_recursive_functions[i - 1] != stack_with_recursive_functions[i] { - result.push(stack_with_recursive_functions[i].clone()); - } - } - - Ok(result.into_iter().map(FunctionName).collect()) + .map(|function_idx| FunctionName(sierra_program.funcs[function_idx.0].id.to_string())) + .collect_vec() } -fn user_function_idx_by_sierra_statement_idx( - sierra_program: &Program, +fn user_function_idx( statement_idx: StatementIdx, + sierra_program: &Program, ) -> UserFunctionSierraIndex { // The `-1` here can't cause an underflow as the statement id of first function's entrypoint is // always 0, so it is always on the left side of the partition, thus the partition index is > 0. diff --git a/crates/cairo-profiler/src/trace_reader/functions.rs b/crates/cairo-profiler/src/trace_reader/functions.rs new file mode 100644 index 0000000..8ade520 --- /dev/null +++ b/crates/cairo-profiler/src/trace_reader/functions.rs @@ -0,0 +1,60 @@ +use crate::trace_reader::function_trace_builder::Steps; +use crate::trace_reader::EntryPointId; +use itertools::Itertools; +use lazy_static::lazy_static; +use regex::Regex; + +lazy_static! { + static ref RE_LOOP_FUNC: Regex = Regex::new(r"\[expr\d*\]") + .expect("Failed to create regex normalising loop functions names"); + static ref RE_MONOMORPHIZATION: Regex = Regex::new(r"<.*>") + .expect("Failed to create regex normalising mononorphised generic functions names"); +} + +#[derive(Clone, Hash, Eq, PartialEq)] +pub struct FunctionName(pub String); + +impl From<&EntryPointId> for FunctionName { + fn from(value: &EntryPointId) -> Self { + FunctionName(format!("{value}")) + } +} + +impl FunctionName { + pub fn to_displayable_function_name(&self, split_generics: bool) -> FunctionName { + // Remove suffix in case of loop function e.g. `[expr36]`. + let func_name = RE_LOOP_FUNC.replace(&self.0, ""); + // Remove parameters from monomorphised Cairo generics e.g. ``. + FunctionName( + if split_generics { + func_name + } else { + RE_MONOMORPHIZATION.replace(&func_name, "") + } + .to_string(), + ) + } +} + +pub struct FunctionStackTrace { + pub stack_trace: Vec, + pub steps: Steps, +} + +impl FunctionStackTrace { + pub fn to_displayable_function_stack_trace(&self, split_generics: bool) -> FunctionStackTrace { + let stack_with_recursive_functions = self + .stack_trace + .iter() + .map(|function_name| function_name.to_displayable_function_name(split_generics)) + .collect_vec(); + + // Consolidate recursive function calls into one function call - they mess up the flame graph. + let displayable_stack_trace = stack_with_recursive_functions.into_iter().dedup().collect(); + + FunctionStackTrace { + stack_trace: displayable_stack_trace, + steps: self.steps, + } + } +} diff --git a/crates/cairo-profiler/tests/e2e.rs b/crates/cairo-profiler/tests/e2e.rs index 9c1f69b..b21b079 100644 --- a/crates/cairo-profiler/tests/e2e.rs +++ b/crates/cairo-profiler/tests/e2e.rs @@ -23,6 +23,7 @@ fn output_path() { assert!(temp_dir.join("my/output/dir/my_file.pb.gz").exists()); } +#[test_case(&["call.json", "--split-generics"]; "with split generics")] #[test_case(&["call.json", "--max-function-trace-depth", "5"]; "with max function trace depth")] #[test_case(&["call.json", "--show-details"]; "with details")] #[test_case(&["call.json"]; "without details")]