Skip to content

Commit

Permalink
Fix cfg blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
LouisChourakiSonos committed Dec 3, 2024
1 parent b5da504 commit 4a15f52
Showing 1 changed file with 50 additions and 57 deletions.
107 changes: 50 additions & 57 deletions libcli/src/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@ use tract_core::num_traits::Zero;
use tract_core::ops::scan::State;
use tract_core::ops::submodel::TypedModelOpState;

#[cfg(any(target_os = "macos", target_os = "ios"))]
{
use tract_metal::utils::rescale_gpu_duration;
use tract_metal::MetalSessionHandler;
}

use crate::annotations::*;
use crate::model::Model;
use crate::tensor::make_inputs_for_model;
Expand Down Expand Up @@ -118,61 +112,60 @@ pub fn profile(
}

#[cfg(any(target_os = "macos", target_os = "ios"))]
{
pub fn rec_profiler_metal(
state: &mut TypedSimpleState<TypedModel, Arc<TypedSimplePlan<TypedModel>>>,
dg: &mut Annotations,
inputs: &TVec<TValue>,
prefix: &[(usize, String)],
) -> TractResult<TVec<TValue>> {

let result = tract_metal::METAL_CONTEXT.with_borrow(|ctxt| {
let session_handler =
MetalSessionHandler::from_plan(state.plan(), &state.session_state.resolved_symbols)?;
session_handler.before_plan_eval(&mut state.session_state)?;

let (mut cpu_start, mut gpu_start): (u64, u64) = (0, 0);
ctxt.device().sample_timestamps(&mut cpu_start, &mut gpu_start);

let (r, profiler) = ctxt.profile(|| {
let r = state.run_plan_with_eval(
inputs.clone(),
|session_state, mut node_state, node, input| {
// Profile node
let start = crate::time::now();
let res = tract_core::plan::eval(
session_state,
node_state.as_deref_mut(),
node,
input.clone(),
);
let elapsed = start.elapsed();
let node_id = NodeQId(prefix.into(), node.id);
*dg.node_mut(node_id).profile.get_or_insert(Duration::default()) += elapsed;

res
},
)?;
Ok(r)
})?;

let (mut cpu_end, mut gpu_end): (u64, u64) = (0, 0);
ctxt.device().sample_timestamps(&mut cpu_end, &mut gpu_end);
pub fn rec_profiler_metal(
state: &mut TypedSimpleState<TypedModel, Arc<TypedSimplePlan<TypedModel>>>,
dg: &mut Annotations,
inputs: &TVec<TValue>,
prefix: &[(usize, String)],
) -> TractResult<TVec<TValue>> {
let result = tract_metal::METAL_CONTEXT.with_borrow(|ctxt| {
let session_handler = tract_metal::MetalSessionHandler::from_plan(
state.plan(),
&state.session_state.resolved_symbols,
)?;
session_handler.before_plan_eval(&mut state.session_state)?;

let (mut cpu_start, mut gpu_start): (u64, u64) = (0, 0);
ctxt.device().sample_timestamps(&mut cpu_start, &mut gpu_start);

let (r, profiler) = ctxt.profile(|| {
let r = state.run_plan_with_eval(
inputs.clone(),
|session_state, mut node_state, node, input| {
// Profile node
let start = crate::time::now();
let res = tract_core::plan::eval(
session_state,
node_state.as_deref_mut(),
node,
input.clone(),
);
let elapsed = start.elapsed();
let node_id = NodeQId(prefix.into(), node.id);
*dg.node_mut(node_id).profile.get_or_insert(Duration::default()) += elapsed;

res
},
)?;
Ok(r)
})?;

session_handler.after_plan_eval(&mut state.session_state)?;
let (mut cpu_end, mut gpu_end): (u64, u64) = (0, 0);
ctxt.device().sample_timestamps(&mut cpu_end, &mut gpu_end);

profiler.iter().for_each(|(node_id, duration)| {
let node_id = NodeQId(prefix.into(), *node_id);
*dg.node_mut(node_id).accelerator_profile.get_or_insert(Duration::default()) +=
Duration::from_nanos(rescale_gpu_duration(
*duration, cpu_start, cpu_end, gpu_start, gpu_end,
));
});
session_handler.after_plan_eval(&mut state.session_state)?;

Ok(r)
profiler.iter().for_each(|(node_id, duration)| {
let node_id = NodeQId(prefix.into(), *node_id);
*dg.node_mut(node_id).accelerator_profile.get_or_insert(Duration::default()) +=
Duration::from_nanos(tract_metal::utils::rescale_gpu_duration(
*duration, cpu_start, cpu_end, gpu_start, gpu_end,
));
});
result
}

Ok(r)
});
result
}

#[allow(clippy::too_many_arguments)]
Expand Down

0 comments on commit 4a15f52

Please sign in to comment.