From 101d46cca676c320188e8d13e315cd640b346a8a Mon Sep 17 00:00:00 2001 From: Oliver Browne Date: Wed, 30 Oct 2024 15:52:26 +0200 Subject: [PATCH] fix(err): cluster frames before resolution (#25886) --- rust/common/metrics/src/lib.rs | 3 ++- rust/cymbal/src/langs/js.rs | 2 +- rust/cymbal/src/main.rs | 41 +++++++++++++++++++++++--------- rust/cymbal/src/metric_consts.rs | 1 + rust/cymbal/src/types/frames.rs | 5 ++++ 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/rust/common/metrics/src/lib.rs b/rust/common/metrics/src/lib.rs index 9e82e98cc004f..967188db29fc3 100644 --- a/rust/common/metrics/src/lib.rs +++ b/rust/common/metrics/src/lib.rs @@ -46,7 +46,8 @@ pub fn setup_metrics_routes(router: Router) -> Router { pub fn setup_metrics_recorder() -> PrometheusHandle { const BUCKETS: &[f64] = &[ - 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 50.0, 100.0, 250.0, + 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 50.0, 100.0, 250.0, 500.0, + 1000.0, 2000.0, 5000.0, 10000.0, ]; PrometheusBuilder::new() diff --git a/rust/cymbal/src/langs/js.rs b/rust/cymbal/src/langs/js.rs index 89deafdc46479..e8921d614518d 100644 --- a/rust/cymbal/src/langs/js.rs +++ b/rust/cymbal/src/langs/js.rs @@ -62,7 +62,7 @@ impl RawJSFrame { (self, e).into() } - fn source_url(&self) -> Result { + pub fn source_url(&self) -> Result { // We can't resolve a frame without a source ref, and are forced // to assume this frame is not minified let Some(source_url) = &self.source_url else { diff --git a/rust/cymbal/src/main.rs b/rust/cymbal/src/main.rs index 9ea6c02b4a065..8087bff817872 100644 --- a/rust/cymbal/src/main.rs +++ b/rust/cymbal/src/main.rs @@ -1,4 +1,4 @@ -use std::{future::ready, sync::Arc}; +use std::{collections::HashMap, future::ready, sync::Arc}; use axum::{routing::get, Router}; use common_kafka::kafka_consumer::RecvErr; @@ -8,7 +8,10 @@ use cymbal::{ app_context::AppContext, config::Config, error::Error, - metric_consts::{ERRORS, EVENT_RECEIVED, MAIN_LOOP_TIME, PER_STACK_TIME, STACK_PROCESSED}, + metric_consts::{ + ERRORS, EVENT_RECEIVED, MAIN_LOOP_TIME, PER_FRAME_GROUP_TIME, PER_STACK_TIME, + STACK_PROCESSED, + }, types::{frames::RawFrame, ErrProps}, }; use envconfig::Envconfig; @@ -119,21 +122,37 @@ async fn main() -> Result<(), Error> { let stack_trace: &Vec = &trace.frames; let per_stack = common_metrics::timing_guard(PER_STACK_TIME, &[]); - let mut frames = Vec::with_capacity(stack_trace.len()); + + // Cluster the frames by symbol set + let mut groups = HashMap::new(); for frame in stack_trace { - match frame.resolve(event.team_id, &context.catalog).await { - Ok(r) => frames.push(r), - Err(err) => { - metrics::counter!(ERRORS, "cause" => "frame_not_parsable").increment(1); - error!("Error parsing stack frame: {:?}", err); - continue; + let group = groups + .entry(frame.symbol_set_group_key()) + .or_insert_with(Vec::new); + group.push(frame.clone()); + } + + let team_id = event.team_id; + let mut results = Vec::with_capacity(stack_trace.len()); + for (_, frames) in groups.into_iter() { + context.worker_liveness.report_healthy().await; // TODO - we shouldn't need to do this, but we do for now. + let mut any_success = false; + let per_frame_group = common_metrics::timing_guard(PER_FRAME_GROUP_TIME, &[]); + for frame in frames { + results.push(frame.resolve(team_id, &context.catalog).await); + if results.last().unwrap().is_ok() { + any_success = true; } - }; + } + per_frame_group + .label("resolved_any", if any_success { "true" } else { "false" }) + .fin(); } + per_stack .label( "resolved_any", - if frames.is_empty() { "true" } else { "false" }, + if results.is_empty() { "true" } else { "false" }, ) .fin(); whole_loop.label("had_frame", "true").fin(); diff --git a/rust/cymbal/src/metric_consts.rs b/rust/cymbal/src/metric_consts.rs index 093636eda9d26..aa46aa4719f6c 100644 --- a/rust/cymbal/src/metric_consts.rs +++ b/rust/cymbal/src/metric_consts.rs @@ -15,3 +15,4 @@ pub const STORE_CACHE_EVICTIONS: &str = "cymbal_store_cache_evictions"; pub const MAIN_LOOP_TIME: &str = "cymbal_main_loop_time"; pub const PER_FRAME_TIME: &str = "cymbal_per_frame_time"; pub const PER_STACK_TIME: &str = "cymbal_per_stack_time"; +pub const PER_FRAME_GROUP_TIME: &str = "cymbal_per_frame_group_time"; diff --git a/rust/cymbal/src/types/frames.rs b/rust/cymbal/src/types/frames.rs index 816ef78ba3782..f14840cfb0073 100644 --- a/rust/cymbal/src/types/frames.rs +++ b/rust/cymbal/src/types/frames.rs @@ -27,6 +27,11 @@ impl RawFrame { res } + + pub fn symbol_set_group_key(&self) -> String { + let RawFrame::JavaScript(raw) = self; + raw.source_url().map(String::from).unwrap_or_default() + } } // We emit a single, unified representation of a frame, which is what we pass on to users.