Skip to content

Commit

Permalink
feat(err): Wire up resolver (#25464)
Browse files Browse the repository at this point in the history
  • Loading branch information
oliverb123 authored Oct 10, 2024
1 parent a2efb87 commit 273f64a
Show file tree
Hide file tree
Showing 12 changed files with 351 additions and 117 deletions.
55 changes: 47 additions & 8 deletions rust/cymbal/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,51 @@ pub enum Error {
SqlxError(#[from] sqlx::Error),
#[error("Reqwest error: {0}")]
ReqwestError(#[from] reqwest::Error),
#[error("Not implemented error: {0}")]
NotImplementedError(String),
#[error("Lookup failed: {0}")]
LookupFailed(String),
#[error("Could not get source ref from: {0}")]
InvalidSourceRef(String),
#[error("sourcemap error: {0}")]
SourceMapError(#[from] sourcemap::Error),
#[error(transparent)]
ResolutionError(#[from] ResolutionError),
}

// These are errors that occur during frame resolution. This excludes e.g. network errors,
// which are handled by the store - this is the error type that's handed to the frame to see
// if it can still make something useful out of it.
#[derive(Debug, Error)]
pub enum ResolutionError {
#[error(transparent)]
JavaScript(#[from] JsResolveErr),
}

#[derive(Debug, Error)]
pub enum JsResolveErr {
// The frame has no source url. This might indicate it needs no further processing, who knows
#[error("No source url found")]
NoSourceUrl,
// We failed to parse a found source map
#[error("Invalid source map: {0}")]
InvalidSourceMap(#[from] sourcemap::Error),
// We found and parsed the source map, but couldn't find our frames token in it
#[error("Token not found for frame: {0}:{1}:{2}")]
TokenNotFound(String, u32, u32),
// We couldn't parse the source url of the frame
#[error("Invalid source url: {0}")]
InvalidSourceUrl(String),
// We couldn't find a sourcemap associated with the frames source url, after
// fetching the source, in either the headers or body. This might indicate
// the frame is not minified
#[error("Could not find sourcemap for source url: {0}")]
NoSourcemap(String),
// We made a request to the source URL, and got a source
// map header, but couldn't parse it to a utf8 string
#[error("Could not parse source-map header from url {0}")]
InvalidSourceMapHeader(String),
// We found a source map url, in the headers or body
// of the response from the source url, but couldn't
// parse it to a URL to actually fetch the source map
#[error("Invalid source url: {0}")]
InvalidSourceMapUrl(String),
}

impl From<JsResolveErr> for Error {
fn from(e: JsResolveErr) -> Self {
ResolutionError::JavaScript(e).into()
}
}
117 changes: 99 additions & 18 deletions rust/cymbal/src/langs/js.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
use reqwest::Url;
use serde::Deserialize;
use sourcemap::Token;

use crate::error::Error;
use crate::{
error::{Error, JsResolveErr},
types::frames::Frame,
};

// A minifed JS stack frame. Just the minimal information needed to lookup some
// sourcemap for it and produce a "real" stack frame.
Expand All @@ -13,27 +17,104 @@ pub struct RawJSFrame {
#[serde(rename = "colno")]
pub column: u32,
#[serde(rename = "filename")]
pub script_url: String,
pub source_url: Option<String>, // The url the the script the frame was in was fetched from
pub in_app: bool,
#[serde(rename = "function")]
pub fn_name: String,
}

impl RawJSFrame {
pub fn source_ref(&self) -> Result<Url, Error> {
// Frame scrupt URLs are in the form: `<protocol>://<domain>/<path>:<line>:<column>`. We
// want to strip the line and column, if they're present, and then return the rest
let to_protocol_end = self
.script_url
.find("://")
.ok_or(Error::InvalidSourceRef(self.script_url.clone()))?
+ 3;

let (protocol, rest) = self.script_url.split_at(to_protocol_end);
let to_end_of_path = rest.find(':').unwrap_or(rest.len());
let useful = protocol.len() + to_end_of_path;
let url = &self.script_url[..useful];
Url::parse(url).map_err(|_| Error::InvalidSourceRef(self.script_url.clone()))
pub fn source_ref(&self) -> Result<Url, JsResolveErr> {
// We can't resolve a frame without a source ref, and are forced
// to assume this frame is not minified
let Some(source_url) = &self.source_url else {
return Err(JsResolveErr::NoSourceUrl);
};

// We outright reject relative URLs, or ones that are not HTTP
if !source_url.starts_with("http://") && !source_url.starts_with("https://") {
return Err(JsResolveErr::InvalidSourceUrl(source_url.clone()));
}

// TODO - we assume these are always absolute urls, and maybe should handle cases where
// they aren't? We control this on the client side, and I'd prefer to enforce it here.

// These urls can have a trailing line and column number, formatted like: http://example.com/path/to/file.js:1:2.
// We need to strip that off to get the actual source url
// If the last colon is after the last slash, remove it, under the assumption that it's a column number.
// If there is no colon, or it's before the last slash, we assume the whole thing is a url,
// with no trailing line and column numbers
let last_colon = source_url.rfind(':');
let last_slash = source_url.rfind('/');
let useful = match (last_colon, last_slash) {
(Some(colon), Some(slash)) if colon > slash => colon,
_ => source_url.len(),
};

// We do this check one more time, to account for possible line number
let source_url = &source_url[..useful];
let last_colon = source_url.rfind(':');
let last_slash = source_url.rfind('/');
let useful = match (last_colon, last_slash) {
(Some(colon), Some(slash)) if colon > slash => colon,
_ => source_url.len(),
};

Url::parse(&source_url[..useful])
.map_err(|_| JsResolveErr::InvalidSourceUrl(source_url.to_string()))
}

// JS frames can only handle JS resolution errors - errors at the network level
pub fn try_handle_resolution_error(&self, e: JsResolveErr) -> Result<Frame, Error> {
// A bit naughty, but for code like this, justified I think
use JsResolveErr::{
InvalidSourceMap, InvalidSourceMapHeader, InvalidSourceMapUrl, InvalidSourceUrl,
NoSourceUrl, NoSourcemap, TokenNotFound,
};

// I break out all the cases individually here, rather than using an _ to match all,
// because I want this to stop compiling if we add new cases
Ok(match e {
NoSourceUrl => self.try_assume_unminified().ok_or(NoSourceUrl), // We assume we're not minified
NoSourcemap(u) => self.try_assume_unminified().ok_or(NoSourcemap(u)),
InvalidSourceMap(e) => Err(JsResolveErr::from(e)),
TokenNotFound(s, l, c) => Err(TokenNotFound(s, l, c)),
InvalidSourceUrl(u) => Err(InvalidSourceUrl(u)),
InvalidSourceMapHeader(u) => Err(InvalidSourceMapHeader(u)),
InvalidSourceMapUrl(u) => Err(InvalidSourceMapUrl(u)),
}?)
}

// Returns none if the frame is
fn try_assume_unminified(&self) -> Option<Frame> {
// TODO - we should include logic here that uses some kind of heuristic to determine
// if this frame is minified or not. Right now, we simply assume it isn't if this is
// being called (and all the cases where it's called are above)
Some(Frame {
mangled_name: self.fn_name.clone(),
line: Some(self.line),
column: Some(self.column),
source: self.source_url.clone(), // Maybe we have one?
in_app: self.in_app,
resolved_name: Some(self.fn_name.clone()), // This is the bit we'd want to check
lang: "javascript".to_string(),
})
}
}

impl From<(&RawJSFrame, Token<'_>)> for Frame {
fn from(src: (&RawJSFrame, Token)) -> Self {
let (raw_frame, token) = src;

Self {
mangled_name: raw_frame.fn_name.clone(),
line: Some(token.get_src_line()),
column: Some(token.get_src_col()),
source: token.get_source().map(String::from),
in_app: raw_frame.in_app,
resolved_name: token.get_name().map(String::from),
lang: "javascript".to_string(),
}
}
}

Expand All @@ -44,7 +125,7 @@ mod test {
let frame = super::RawJSFrame {
line: 1,
column: 2,
script_url: "http://example.com/path/to/file.js:1:2".to_string(),
source_url: Some("http://example.com/path/to/file.js:1:2".to_string()),
in_app: true,
fn_name: "main".to_string(),
};
Expand All @@ -57,7 +138,7 @@ mod test {
let frame = super::RawJSFrame {
line: 1,
column: 2,
script_url: "http://example.com/path/to/file.js".to_string(),
source_url: Some("http://example.com/path/to/file.js".to_string()),
in_app: true,
fn_name: "main".to_string(),
};
Expand Down
117 changes: 103 additions & 14 deletions rust/cymbal/src/resolver.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
error::Error,
error::{Error, JsResolveErr},
symbol_store::SymbolStore,
types::frames::{Frame, RawFrame},
};
Expand All @@ -8,35 +8,124 @@ use sourcemap::SourceMap;

#[async_trait]
pub trait Resolver: Send + Sync + 'static {
// Resolvers work on a per-frame basis, so we can be clever about the order
// in which we resolve them. We also force any resolver to handle all frame
// types
// TODO - I'm not totally convinced this resolver interface shouldn't enforce
// some kind of batch-style use (take a symbol set ref and a list of frame
// explicitly? I'm not sure)... right now this interface is prone to "holding it
// wrong" type performance issues. Resolvers should maybe even encode a "submit"
// style interface, where users are expected to send them work in a stream and
// asynchronously get back results - which would permit internal batching etc.
// Idk, that's a lot of complexity. I'm a lot less happy with this interface
// than I am with the store one, though.
async fn resolve(&self, raw: RawFrame, team_id: i32) -> Result<Frame, Error>;
}

pub struct ResolverImpl {
pub store: Box<dyn SymbolStore>,
}

#[async_trait]
impl Resolver for ResolverImpl {
async fn resolve(&self, raw: RawFrame, team_id: i32) -> Result<Frame, Error> {
impl ResolverImpl {
pub fn new(store: Box<dyn SymbolStore>) -> Self {
Self { store }
}

async fn resolve_impl(&self, raw: &RawFrame, team_id: i32) -> Result<Frame, Error> {
let source_ref = raw.source_ref()?;
let source = self.store.fetch(team_id, source_ref).await?;

// Since we only support js right now, this is all we do. Everything from here
// is js specific
let RawFrame::JavaScript(raw) = raw;
let sm = SourceMap::from_reader(source.as_slice())?;
let token = sm
.lookup_token(raw.line, raw.column)
.ok_or_else(|| Error::LookupFailed(String::from("Token not found")))?;
let sm = SourceMap::from_reader(source.as_slice()).map_err(JsResolveErr::from)?;
let token = sm.lookup_token(raw.line, raw.column).ok_or_else(|| {
// Unwrap is safe because, if this frame couldn't give us a source ref, we'd know already
JsResolveErr::TokenNotFound(raw.source_ref().unwrap().to_string(), raw.line, raw.column)
})?;

Ok((raw, token).into())
}
}

impl ResolverImpl {
pub fn new(store: Box<dyn SymbolStore>) -> Self {
Self { store }
#[async_trait]
impl Resolver for ResolverImpl {
async fn resolve(&self, raw: RawFrame, team_id: i32) -> Result<Frame, Error> {
match self.resolve_impl(&raw, team_id).await {
Ok(frame) => Ok(frame),
Err(e) => raw.try_handle_resolve_error(e),
}
}
}

#[cfg(test)]
mod test {
use common_types::ClickHouseEvent;
use httpmock::MockServer;

use crate::{
config::Config,
resolver::Resolver,
symbol_store::{basic::BasicStore, caching::CachingStore},
types::{frames::RawFrame, ErrProps},
};

use super::ResolverImpl;

const CHUNK_PATH: &str = "/static/chunk-PGUQKT6S.js";
const MINIFIED: &[u8] = include_bytes!("../tests/static/chunk-PGUQKT6S.js");
const MAP: &[u8] = include_bytes!("../tests/static/chunk-PGUQKT6S.js.map");
const EXAMPLE_EXCEPTION: &str = include_str!("../tests/static/raw_ch_exception.json");

#[tokio::test]
async fn end_to_end_resolver_test() {
let server = MockServer::start();

let source_mock = server.mock(|when, then| {
when.method("GET").path(CHUNK_PATH);
then.status(200).body(MINIFIED);
});

let map_mock = server.mock(|when, then| {
// Our minified example source uses a relative URL, formatted like this
when.method("GET").path(format!("{}.map", CHUNK_PATH));
then.status(200).body(MAP);
});

let exception: ClickHouseEvent = serde_json::from_str(EXAMPLE_EXCEPTION).unwrap();
let props: ErrProps = serde_json::from_str(&exception.properties.unwrap()).unwrap();
let mut test_stack: Vec<RawFrame> =
serde_json::from_str(props.exception_stack_trace_raw.as_ref().unwrap()).unwrap();

// We're going to pretend out stack consists exclusively of JS frames whose source
// we have locally
test_stack.retain(|s| {
let RawFrame::JavaScript(s) = s;
s.source_url.as_ref().unwrap().contains(CHUNK_PATH)
});

for frame in &mut test_stack {
let RawFrame::JavaScript(frame) = frame;
// Our test data contains our /actual/ source urls - we need to swap that to localhost
// When I first wrote this test, I forgot to do this, and it took me a while to figure out
// why the test was passing before I'd even set up the mockserver - which was pretty cool, tbh
frame.source_url = Some(server.url(CHUNK_PATH).to_string());
}

let mut config = Config::init_with_defaults().unwrap();
config.allow_internal_ips = true; // We're hitting localhost for the tests

let store = BasicStore::new(&config).unwrap();
// We're even going to assert we only hit the mockserver once for the source and sourcemap
let store = CachingStore::new(Box::new(store), 10_000_000);

let resolver = ResolverImpl::new(Box::new(store));

let mut resolved_frames = Vec::new();
for frame in test_stack {
let resolved = resolver.resolve(frame, 1).await.unwrap();
resolved_frames.push(resolved);
}

// The use of the caching layer is tested here - we should only have hit the server once
source_mock.assert_hits(1);
map_mock.assert_hits(1);
}
}
Loading

0 comments on commit 273f64a

Please sign in to comment.