Skip to content

Commit

Permalink
Mem plan only once before profiling + Print CPU-Accelerator time diff
Browse files Browse the repository at this point in the history
  • Loading branch information
LouisChourakiSonos committed Dec 4, 2024
1 parent 474ce79 commit 27ea988
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 27 deletions.
10 changes: 8 additions & 2 deletions libcli/src/annotations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,13 @@ impl<'a> std::ops::Add<&'a NodeTags> for &'a NodeTags {

let profile = self.profile.unwrap_or_default() + other.profile.unwrap_or_default();
let profile = if profile != Duration::default() { Some(profile) } else { None };
let accelerator_profile = self.accelerator_profile.unwrap_or_default() + other.accelerator_profile.unwrap_or_default();
let accelerator_profile = if accelerator_profile != Duration::default() { Some(accelerator_profile) } else { None };
let accelerator_profile = self.accelerator_profile.unwrap_or_default()
+ other.accelerator_profile.unwrap_or_default();
let accelerator_profile = if accelerator_profile != Duration::default() {
Some(accelerator_profile)
} else {
None
};

let style = self.style.or(other.style);
let labels = self.labels.iter().chain(other.labels.iter()).cloned().collect();
Expand Down Expand Up @@ -275,6 +280,7 @@ impl Annotations {
pub struct ProfileSummary {
pub max: Duration,
pub sum: Duration,
pub accel_sum: Duration,
pub entire: Duration,
pub iters: usize,
}
Expand Down
2 changes: 1 addition & 1 deletion libcli/src/display_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub struct DisplayParams {
pub json: bool,
pub info: bool,
pub left_column_width: usize,
pub has_accelerator: bool
pub has_accelerator: bool,
}

impl DisplayParams {
Expand Down
57 changes: 34 additions & 23 deletions libcli/src/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ pub fn profile(

let plan = TypedSimplePlan::new_with_options(model.clone(), plan_options)?;
let mut state = TypedSimpleState::new(Arc::new(plan))?;
let start = crate::time::now();
let mut time_accounted_by_inner_nodes = Duration::default();
while iters < bench_limits.max_loops && start.elapsed() < bench_limits.max_time {
if !is_metal {

let entire;
if !is_metal {
let start = crate::time::now();
let mut time_accounted_by_inner_nodes = Duration::default();
while iters < bench_limits.max_loops && start.elapsed() < bench_limits.max_time {
rec_profiler(
&mut state,
dg,
Expand All @@ -82,15 +84,32 @@ pub fn profile(
&mut time_accounted_by_inner_nodes,
folded,
)?;
} else {
#[cfg(any(target_os = "macos", target_os = "ios"))]
{

iters += 1;
}

entire = start.elapsed() - time_accounted_by_inner_nodes;
} else {
#[cfg(any(target_os = "macos", target_os = "ios"))]
{
let session_handler = tract_metal::MetalSessionHandler::from_plan(
state.plan(),
&state.session_state.resolved_symbols,
)?;
session_handler.before_plan_eval(&mut state.session_state)?;

let start = crate::time::now();
while iters < bench_limits.max_loops && start.elapsed() < bench_limits.max_time {
rec_profiler_metal(&mut state, dg, inputs, &prefix)?;

iters += 1;
}

entire = start.elapsed();
session_handler.after_plan_eval(&mut state.session_state)?;
}
iters += 1;
}
let entire = start.elapsed() - time_accounted_by_inner_nodes;

info!("Running {} iterations max. for each node.", bench_limits.max_loops);
info!("Running for {} ms max. for each node.", bench_limits.max_time.as_millis());

Expand All @@ -107,7 +126,8 @@ pub fn profile(
}
let max = dg.tags.values().filter_map(|t| t.profile).max().unwrap();
let sum = dg.tags.values().filter_map(|t| t.profile).sum::<Duration>();
dg.profile_summary = Some(ProfileSummary { max, sum, entire, iters });
let accel_sum = dg.tags.values().filter_map(|t| t.accelerator_profile).sum::<Duration>();
dg.profile_summary = Some(ProfileSummary { max, sum, accel_sum, entire, iters });
Ok(())
}

Expand All @@ -118,17 +138,11 @@ pub fn rec_profiler_metal(
inputs: &TVec<TValue>,
prefix: &[(usize, String)],
) -> TractResult<TVec<TValue>> {
let result = tract_metal::METAL_CONTEXT.with_borrow(|ctxt| {
let session_handler = tract_metal::MetalSessionHandler::from_plan(
state.plan(),
&state.session_state.resolved_symbols,
)?;
session_handler.before_plan_eval(&mut state.session_state)?;

tract_metal::METAL_CONTEXT.with_borrow(|ctxt| {
let (mut cpu_start, mut gpu_start): (u64, u64) = (0, 0);
ctxt.device().sample_timestamps(&mut cpu_start, &mut gpu_start);

let (r, profiler) = ctxt.profile(|| {
let (result, profiler) = ctxt.profile(|| {
let r = state.run_plan_with_eval(
inputs.clone(),
|session_state, mut node_state, node, input| {
Expand All @@ -153,8 +167,6 @@ pub fn rec_profiler_metal(
let (mut cpu_end, mut gpu_end): (u64, u64) = (0, 0);
ctxt.device().sample_timestamps(&mut cpu_end, &mut gpu_end);

session_handler.after_plan_eval(&mut state.session_state)?;

profiler.iter().for_each(|(node_id, duration)| {
let node_id = NodeQId(prefix.into(), *node_id);
*dg.node_mut(node_id).accelerator_profile.get_or_insert(Duration::default()) +=
Expand All @@ -163,9 +175,8 @@ pub fn rec_profiler_metal(
));
});

Ok(r)
});
result
Ok(result)
})
}

#[allow(clippy::too_many_arguments)]
Expand Down
9 changes: 8 additions & 1 deletion libcli/src/terminal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ pub fn render_summaries(
White.bold().paint(format!("{:<17}", "CPU")),
White.bold().paint(if options.has_accelerator { "Accelerator" } else { "" }),
);

for (op, (cpu_dur, accel_dur, n)) in annotations
.tags
.iter()
Expand Down Expand Up @@ -471,6 +471,13 @@ pub fn render_summaries(
"Not accounted by ops: {}",
dur_avg_ratio(summary.entire - summary.sum.min(summary.entire), summary.entire)
);

if options.has_accelerator {
println!(
"(Total CPU Op time - Total Accelerator Op time): {}",
dur_avg_ratio(summary.sum - summary.accel_sum.min(summary.sum), summary.entire)
);
}
println!("Entire network performance: {}", dur_avg(summary.entire));
}

Expand Down

0 comments on commit 27ea988

Please sign in to comment.