diff --git a/fxprof-processed-profile/src/markers.rs b/fxprof-processed-profile/src/markers.rs index 633744eb..c69f881e 100644 --- a/fxprof-processed-profile/src/markers.rs +++ b/fxprof-processed-profile/src/markers.rs @@ -6,10 +6,9 @@ use serde::ser::{SerializeMap, SerializeSeq}; use serde::Serialize; use serde_derive::Serialize; -use crate::{CategoryHandle, Profile}; - use super::profile::StringHandle; use super::timestamp::Timestamp; +use crate::{CategoryHandle, Profile}; /// The handle for a marker. Returned from [`Profile::add_marker`]. /// diff --git a/samply/src/linux_shared/kernel_symbols.rs b/samply/src/linux_shared/kernel_symbols.rs index 63ff89b6..baabf9de 100644 --- a/samply/src/linux_shared/kernel_symbols.rs +++ b/samply/src/linux_shared/kernel_symbols.rs @@ -1,6 +1,6 @@ -use std::path::Path; +use std::fmt::Debug; +use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::{fmt::Debug, path::PathBuf}; use fxprof_processed_profile::{Symbol, SymbolTable}; use object::{elf, read, NativeEndian, Object}; diff --git a/samply/src/mac/codesign_setup.rs b/samply/src/mac/codesign_setup.rs index 6af3b09b..acc35a6b 100644 --- a/samply/src/mac/codesign_setup.rs +++ b/samply/src/mac/codesign_setup.rs @@ -1,4 +1,5 @@ -use std::{env, io::Write}; +use std::env; +use std::io::Write; const ENTITLEMENTS_XML: &str = r#" diff --git a/samply/src/mac/process_launcher.rs b/samply/src/mac/process_launcher.rs index 65823817..8fc56725 100644 --- a/samply/src/mac/process_launcher.rs +++ b/samply/src/mac/process_launcher.rs @@ -13,10 +13,9 @@ use mach::task::{task_resume, task_suspend}; use mach::traps::task_for_pid; use tempfile::tempdir; -use crate::shared::ctrl_c::CtrlC; - pub use super::mach_ipc::{mach_port_t, MachError, OsIpcSender}; use super::mach_ipc::{mach_task_self, BlockingMode, OsIpcMultiShotServer, MACH_PORT_NULL}; +use crate::shared::ctrl_c::CtrlC; pub trait RootTaskRunner { fn run_root_task(&mut self) -> Result; diff --git a/samply/src/server.rs b/samply/src/server.rs index 81a3ca78..4cace6a4 100644 --- a/samply/src/server.rs +++ b/samply/src/server.rs @@ -20,7 +20,7 @@ use rand::RngCore; use tokio::net::TcpListener; use tokio_util::io::ReaderStream; use wholesym::debugid::DebugId; -use wholesym::{LibraryInfo, SymbolManager, SymbolManagerConfig}; +use wholesym::{LibraryInfo, SymbolManager, SymbolManagerConfig, VerboseSymbolManagerObserver}; use crate::name::SAMPLY_NAME; use crate::shared; @@ -65,13 +65,12 @@ impl PortSelection { } } -fn create_symbol_manager_config(symbol_props: SymbolProps, verbose: bool) -> SymbolManagerConfig { +fn create_symbol_manager_config(symbol_props: SymbolProps) -> SymbolManagerConfig { let _config_dir = AppDirs::new(Some(SAMPLY_NAME), true).map(|dirs| dirs.config_dir); let cache_base_dir = AppDirs::new(Some(SAMPLY_NAME), false).map(|dirs| dirs.cache_dir); let cache_base_dir = cache_base_dir.as_deref(); let mut config = SymbolManagerConfig::new() - .verbose(verbose) .respect_nt_symbol_path(true) .use_debuginfod(std::env::var("SAMPLY_USE_DEBUGINFOD").is_ok()) .use_spotlight(true); @@ -164,8 +163,11 @@ async fn start_server( let template_values = Arc::new(template_values); - let config = create_symbol_manager_config(symbol_props, server_props.verbose); + let config = create_symbol_manager_config(symbol_props); let mut symbol_manager = SymbolManager::with_config(config); + if server_props.verbose { + symbol_manager.set_observer(Some(Arc::new(VerboseSymbolManagerObserver::new()))); + } for lib_info in libinfo_map.into_values() { symbol_manager.add_known_library(lib_info); } diff --git a/samply/src/shared/symbol_precog.rs b/samply/src/shared/symbol_precog.rs index 0b6407e0..9220d8e8 100644 --- a/samply/src/shared/symbol_precog.rs +++ b/samply/src/shared/symbol_precog.rs @@ -1,13 +1,13 @@ +use std::collections::HashMap; use std::fs::File; use std::io::BufWriter; +use std::path::Path; use std::str::FromStr; use std::sync::Arc; -use std::{collections::HashMap, path::Path}; use debugid::DebugId; -use serde::{ - ser::SerializeMap, ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer, -}; +use serde::ser::{SerializeMap, SerializeSeq}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::{Deserialize, Serialize}; use serde_json::to_writer; use wholesym::SourceFilePath; diff --git a/samply/src/windows/coreclr.rs b/samply/src/windows/coreclr.rs index 13b56c34..5383381f 100644 --- a/samply/src/windows/coreclr.rs +++ b/samply/src/windows/coreclr.rs @@ -1,21 +1,19 @@ -use std::{collections::HashMap, convert::TryInto, fmt::Display}; +use std::collections::HashMap; +use std::convert::TryInto; +use std::fmt::Display; use bitflags::bitflags; +use etw_reader::parser::{Parser, TryParse}; +use etw_reader::schema::TypedEvent; +use etw_reader::{self, event_properties_to_string}; use fxprof_processed_profile::*; use num_derive::FromPrimitive; use num_traits::FromPrimitive; -use etw_reader::{self, schema::TypedEvent}; -use etw_reader::{ - event_properties_to_string, - parser::{Parser, TryParse}, -}; - +use super::elevated_helper::ElevatedRecordingProps; use crate::shared::recording_props::{CoreClrProfileProps, ProfileCreationProps}; use crate::windows::profile_context::{KnownCategory, ProfileContext}; -use super::elevated_helper::ElevatedRecordingProps; - struct SavedMarkerInfo { start_timestamp_raw: u64, name: String, diff --git a/samply/src/windows/elevated_helper.rs b/samply/src/windows/elevated_helper.rs index 0a2c5c8e..c512e888 100644 --- a/samply/src/windows/elevated_helper.rs +++ b/samply/src/windows/elevated_helper.rs @@ -3,14 +3,13 @@ use std::path::{Path, PathBuf}; use serde_derive::{Deserialize, Serialize}; -use crate::shared::recording_props::{ - CoreClrProfileProps, ProfileCreationProps, RecordingMode, RecordingProps, -}; - use super::utility_process::{ run_child, UtilityProcess, UtilityProcessChild, UtilityProcessParent, UtilityProcessSession, }; use super::xperf::Xperf; +use crate::shared::recording_props::{ + CoreClrProfileProps, ProfileCreationProps, RecordingMode, RecordingProps, +}; #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "t", content = "c")] diff --git a/wholesym-addr2line/src/main.rs b/wholesym-addr2line/src/main.rs index fa9cd8f9..f5003351 100644 --- a/wholesym-addr2line/src/main.rs +++ b/wholesym-addr2line/src/main.rs @@ -1,10 +1,11 @@ use std::borrow::Cow; use std::io::{BufRead, Lines, StdinLock, Write}; use std::path::{Path, PathBuf}; +use std::sync::Arc; use clap::parser::ValuesRef; use clap::{value_parser, Arg, ArgAction, Command}; -use wholesym::LookupAddress; +use wholesym::{LookupAddress, VerboseSymbolManagerObserver}; fn parse_uint_from_hex_string(string: &str) -> u64 { if string.len() > 2 && string.starts_with("0x") { @@ -153,9 +154,9 @@ async fn main() -> Result<(), Box> { let config = wholesym::SymbolManagerConfig::new() .use_spotlight(true) - .verbose(true) .respect_nt_symbol_path(true); - let symbol_manager = wholesym::SymbolManager::with_config(config); + let mut symbol_manager = wholesym::SymbolManager::with_config(config); + symbol_manager.set_observer(Some(Arc::new(VerboseSymbolManagerObserver::new()))); let symbol_map = symbol_manager .load_symbol_map_for_binary_at_path(path, None) .await?; diff --git a/wholesym/src/breakpad.rs b/wholesym/src/breakpad.rs index 611ca468..1f5e49c6 100644 --- a/wholesym/src/breakpad.rs +++ b/wholesym/src/breakpad.rs @@ -1,63 +1,14 @@ -use std::{ - path::{Path, PathBuf}, - sync::{atomic::AtomicU64, Arc}, - time::{Duration, Instant}, -}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; -use futures_util::AsyncReadExt as _; use samply_symbols::{BreakpadIndex, BreakpadIndexParser, BreakpadParseError}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use crate::{ - download::response_to_uncompressed_stream_with_progress, - file_creation::{create_file_cleanly, CleanFileCreationError}, -}; +use crate::downloader::{Downloader, DownloaderObserver, FileDownloadOutcome}; +use crate::file_creation::{create_file_cleanly, CleanFileCreationError}; +use crate::DownloadError; -/// The error type used in the observer notification [`BreakpadSymbolObserver::on_download_failed`]. -#[derive(thiserror::Error, Debug)] -pub enum DownloadError { - /// Creating the reqwest Client failed. - #[error("Creating the client failed: {0}")] - ClientCreationFailed(String), - - /// Opening the request failed. - #[error("Opening the request failed: {0}")] - OpenFailed(Box), - - /// The download timed out. - #[error("The download timed out")] - Timeout, - - /// The server returned a non-success status code. - #[error("The server returned status code {0}")] - StatusError(http::StatusCode), - - /// The destination directory could not be created. - #[error("The destination directory could not be created")] - CouldNotCreateDestinationDirectory, - - /// The response used an unexpected Content-Encoding. - #[error("The response used an unexpected Content-Encoding: {0}")] - UnexpectedContentEncoding(String), - - /// An I/O error occurred in the middle of downloading. - #[error("Error during downloading: {0}")] - ErrorDuringDownloading(std::io::Error), - - /// Error while writing the downloaded file. - #[error("Error while writing the downloaded file: {0}")] - ErrorWhileWritingDownloadedFile(std::io::Error), - - /// Redirect-related error. - #[error("Redirect-related error")] - Redirect(Box), - - /// Other error. - #[error("Other error: {0}")] - Other(Box), -} - -/// The error type used in the observer notification [`BreakpadSymbolObserver::on_symindex_generation_failed`]. +/// The error type used in the observer notification [`DownloaderObserver::on_symindex_generation_failed`]. #[derive(thiserror::Error, Debug)] pub enum SymindexGenerationError { /// No cache directory for breakpad symindex files has been configured. @@ -85,111 +36,6 @@ pub enum SymindexGenerationError { Other(Box), } -#[cfg(test)] -#[test] -fn test_download_error_is_sync() { - fn assert_sync() {} - assert_sync::(); -} - -impl From for DownloadError { - fn from(e: reqwest::Error) -> Self { - if e.is_status() { - DownloadError::StatusError(e.status().unwrap()) - } else if e.is_request() { - DownloadError::OpenFailed(e.into()) - } else if e.is_redirect() { - DownloadError::Redirect(e.into()) - } else if e.is_timeout() { - DownloadError::Timeout - } else { - DownloadError::Other(e.into()) - } - } -} - -/// A trait for observing the behavior of a `BreakpadSymbolDownloader`. -/// This can be used for logging, displaying progress bars, expiring cached files, etc. -pub trait BreakpadSymbolObserver: Send + Sync + 'static { - /// Called when a new download is about to start, before the connection is established. - /// - /// The download ID is unique for each download. - /// - /// For each download ID, we guarantee that exactly one of the following methods - /// will be called at the end of the download: `on_download_completed`, - /// `on_download_failed`, or `on_download_canceled`. - fn on_new_download_before_connect(&self, download_id: u64, url: &str); - - /// Called once the connection has been established and HTTP headers - /// with a success status have arrived. - fn on_download_started(&self, download_id: u64); - - /// Called frequently during the download, whenever a new chunk has been read. - /// - /// If the HTTP response is gzip-compressed, the number of bytes can refer to - /// either the compressed or the uncompressed bytes - but it'll be consistent: - /// Either both `bytes_so_far` and `total_bytes` refer to the compressed sizes, - /// or both refer to the uncompressed sizes. - /// - /// If `total_bytes` is `None`, the total size is unknown. - fn on_download_progress(&self, download_id: u64, bytes_so_far: u64, total_bytes: Option); - - /// Called when the download has completed successfully. - /// - /// Mutually exclusive with `on_download_failed` and `on_download_canceled` for a - /// given download ID. - fn on_download_completed( - &self, - download_id: u64, - uncompressed_size_in_bytes: u64, - time_until_headers: Duration, - time_until_completed: Duration, - ); - - /// Called when the download has failed. - /// - /// This is quite common; the most common reason is [`DownloadError::StatusError`] - /// with [`StatusCode::NOT_FOUND`](http::StatusCode::NOT_FOUND), for files which - /// are not available on the server. - /// - /// Mutually exclusive with `on_download_completed` and `on_download_canceled` for a - /// given download ID. - fn on_download_failed(&self, download_id: u64, reason: DownloadError); - - /// Called when the download has been canceled. - /// - /// This does not indicate an error. We commonly attempt to download a file from - /// multiple sources simultaneously, and cancel other downloads once one has succeeded. - /// - /// This function is also called if the user cancels the download by dropping the future - /// returned from [`BreakpadSymbolDownloader::get_file`]. - /// - /// Mutually exclusive with `on_download_completed` and `on_download_failed` for a - /// given download ID. - fn on_download_canceled(&self, download_id: u64); - - /// Called when a file has been created, for example because it was downloaded from - /// a server, copied from a different cache directory, or extracted from a compressed - /// file. - fn on_file_created(&self, path: &Path, size_in_bytes: u64); - - /// Called when a file from the cache has been used to service a [`BreakpadSymbolDownloader::get_file`] call. - /// - /// This is only called for pre-existing files and not for newly-created files - newly-created - /// files only trigger a call to `on_file_created`. - /// - /// Useful to guide expiration decisions. - fn on_file_accessed(&self, path: &Path); - - /// Called when we were looking for a file in the cache, and it wasn't there. Used for - /// debug logging. - /// - /// Also called if checking for file existence fails for any other reason. - fn on_file_missed(&self, path: &Path); -} - -static NEXT_DOWNLOAD_ID: AtomicU64 = AtomicU64::new(0); - pub struct BreakpadSymbolDownloader { inner: Arc, } @@ -199,28 +45,14 @@ impl BreakpadSymbolDownloader { breakpad_directories_readonly: Vec, breakpad_servers: Vec<(String, PathBuf)>, breakpad_symindex_cache_dir: Option, + downloader: Option>, ) -> Self { - let builder = reqwest::Client::builder(); - - // Turn off HTTP 2, in order to work around https://github.com/seanmonstar/reqwest/issues/1761 . - let builder = builder.http1_only(); - - // Turn off automatic decompression because it doesn't allow us to compute - // download progress percentages: we'd only know the decompressed current - // size and the compressed total size. - // Instead, we do the streaming decompression manually, see download.rs. - let builder = builder.no_gzip().no_brotli().no_deflate(); - - // Create the client. - // TODO: Add timeouts, user agent, maybe other settings - let reqwest_client = builder.build(); - let inner = BreakpadSymbolDownloaderInner { breakpad_directories_readonly, breakpad_servers, breakpad_symindex_cache_dir, observer: None, - reqwest_client, + downloader: downloader.unwrap_or_default(), }; Self { inner: Arc::new(inner), @@ -232,8 +64,8 @@ impl BreakpadSymbolDownloader { /// The observer can be used for logging, displaying progress bars, informing /// automatic expiration of cached files, and so on. /// - /// See the [`BreakpadSymbolObserver`] trait for more information. - pub fn set_observer(&mut self, observer: Option>) { + /// See the [`DownloaderObserver`] trait for more information. + pub fn set_observer(&mut self, observer: Option>) { Arc::get_mut(&mut self.inner).unwrap().observer = observer; } @@ -265,13 +97,18 @@ struct BreakpadSymbolDownloaderInner { breakpad_directories_readonly: Vec, breakpad_servers: Vec<(String, PathBuf)>, breakpad_symindex_cache_dir: Option, - observer: Option>, - reqwest_client: Result, + observer: Option>, + downloader: Arc, } impl BreakpadSymbolDownloaderInner { - pub async fn get_file(&self, rel_path: &str) -> Option { - for dir in &self.breakpad_directories_readonly { + pub async fn get_file_no_download(&self, rel_path: &str) -> Option { + let dirs: Vec<_> = self + .breakpad_directories_readonly + .iter() + .chain(self.breakpad_servers.iter().map(|(_url, dir)| dir)) + .collect(); + for dir in dirs { let path = dir.join(rel_path); if self.check_file_exists(&path).await { if let Some(observer) = self.observer.as_deref() { @@ -281,15 +118,16 @@ impl BreakpadSymbolDownloaderInner { } } + None + } + + pub async fn get_file(&self, rel_path: &str) -> Option { + if let Some(path) = self.get_file_no_download(rel_path).await { + return Some(path); + } + for (server_base_url, cache_dir) in &self.breakpad_servers { - let path = cache_dir.join(rel_path); - if self.check_file_exists(&path).await { - if let Some(observer) = self.observer.as_deref() { - observer.on_file_accessed(&path); - } - return Some(path); - } - if let Some(path) = self + if let Ok(path) = self .get_bp_sym_file_from_server(rel_path, server_base_url, cache_dir) .await { @@ -310,193 +148,38 @@ impl BreakpadSymbolDownloaderInner { file_exists } - pub async fn get_file_no_download(&self, rel_path: &str) -> Option { - let dirs: Vec<_> = self - .breakpad_directories_readonly - .iter() - .chain(self.breakpad_servers.iter().map(|(_url, dir)| dir)) - .collect(); - for dir in dirs { - let path = dir.join(rel_path); - if self.check_file_exists(&path).await { - if let Some(observer) = self.observer.as_deref() { - observer.on_file_accessed(&path); - } - return Some(path); - } - } - - None - } - - async fn prepare_download_of_file( - &self, - url: &str, - ) -> Option<(DownloadStatusReporter, reqwest::Response)> { - let download_id = NEXT_DOWNLOAD_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); - if let Some(observer) = self.observer.as_deref() { - observer.on_new_download_before_connect(download_id, url); - } - - let reporter = DownloadStatusReporter::new(download_id, self.observer.clone()); - - let reqwest_client = match self.reqwest_client.as_ref() { - Ok(client) => client, - Err(e) => { - reporter.download_failed(DownloadError::ClientCreationFailed(e.to_string())); - return None; - } - }; - - let request_builder = reqwest_client.get(url); - - // Manually specify the Accept-Encoding header. - // This would happen automatically if we hadn't turned off automatic - // decompression for this reqwest client. - let request_builder = request_builder.header("Accept-Encoding", "gzip"); - - // Send the request and wait for the headers. - let response_result = request_builder.send().await; - - // Check the HTTP status code. - let response_result = response_result.and_then(|response| response.error_for_status()); - - let response = match response_result { - Ok(response) => response, - Err(e) => { - // The request failed, most commonly due to a 404 status code. - reporter.download_failed(DownloadError::from(e)); - return None; - } - }; - - Some((reporter, response)) - } - - /// Given a relative file path and a cache directory path, concatenate the two to make - /// a destination path, and create the necessary directories so that a file can be stored - /// at the destination path. - async fn make_dest_path_and_ensure_parent_dirs( - &self, - rel_path: &str, - cache_path: &Path, - ) -> Result { - let dest_path = cache_path.join(rel_path); - if let Some(dir) = dest_path.parent() { - tokio::fs::create_dir_all(dir).await?; - } - Ok(dest_path) - } - async fn get_bp_sym_file_from_server( &self, rel_path: &str, server_base_url: &str, cache_dir: &Path, - ) -> Option { + ) -> Result { + let dest_path = cache_dir.join(rel_path); let server_base_url = server_base_url.trim_end_matches('/'); let url = format!("{server_base_url}/{rel_path}"); - let (reporter, response) = self.prepare_download_of_file(&url).await?; - - let ts_after_status = Instant::now(); - let download_id = reporter.download_id(); - if let Some(observer) = self.observer.as_deref() { - observer.on_download_started(download_id); - } - - let dest_path = match self - .make_dest_path_and_ensure_parent_dirs(rel_path, cache_dir) - .await - { - Ok(dest_path) => dest_path, - Err(_e) => { - reporter.download_failed(DownloadError::CouldNotCreateDestinationDirectory); - return None; - } - }; let observer = self.observer.clone(); - let mut stream = match response_to_uncompressed_stream_with_progress( - response, - move |bytes_so_far, total_bytes| { - if let Some(observer) = observer.as_deref() { - observer.on_download_progress(download_id, bytes_so_far, total_bytes) - } - }, - ) { - Ok(stream) => stream, - Err(crate::download::Error::UnexpectedContentEncoding(encoding)) => { - reporter.download_failed(DownloadError::UnexpectedContentEncoding(encoding)); - return None; - } - }; - - let download_result: Result< - (Option, u64), - CleanFileCreationError, - > = create_file_cleanly( - &dest_path, - |dest_file: std::fs::File| async move { - let mut dest_file = tokio::fs::File::from_std(dest_file); - let mut buf = vec![0u8; 4096]; - let mut uncompressed_size_in_bytes = 0; - let mut index_generator = BreakpadIndexParser::new(); - loop { - let count = stream.read(&mut buf).await?; - if count == 0 { - break; - } - uncompressed_size_in_bytes += count as u64; - dest_file.write_all(&buf[..count]).await?; - index_generator.consume(&buf[..count]); - } - dest_file.flush().await?; - Ok((Some(index_generator), uncompressed_size_in_bytes)) - }, - || async { - let size = std::fs::metadata(&dest_path)?.len(); - Ok((None, size)) - }, - ) - .await; - - let (index_generator, uncompressed_size_in_bytes) = match download_result { - Ok((index_generator, size)) => (index_generator, size), - Err(CleanFileCreationError::CallbackIndicatedError(e)) => { - reporter.download_failed(DownloadError::ErrorDuringDownloading(e)); - return None; - } - Err(e) => { - reporter.download_failed(DownloadError::ErrorWhileWritingDownloadedFile(e.into())); - return None; - } - }; - - let ts_after_download = Instant::now(); - reporter.download_completed( - uncompressed_size_in_bytes, - ts_after_status, - ts_after_download, - ); - - if let Some(observer) = self.observer.as_deref() { - observer.on_file_created(&dest_path, uncompressed_size_in_bytes); - } - - match index_generator { - Some(index_generator) => { + let download = self.downloader.initiate_download(&url, observer).await?; + let mut index_generator = BreakpadIndexParser::new(); + let mut consumer = |chunk: &[u8]| index_generator.consume(chunk); + let outcome = download + .download_to_file(&dest_path, Some(&mut consumer)) + .await?; + + match outcome { + FileDownloadOutcome::DidCreateNewFile => { if let Ok(index) = index_generator.finish() { if let Some(symindex_path) = self.symindex_path(rel_path) { let _ = self.write_symindex(&symindex_path, index).await; } } } - None => { + FileDownloadOutcome::FoundExistingFile => { let _ = self.ensure_symindex(&dest_path, rel_path).await; } } - Some(dest_path) + Ok(dest_path) } pub fn symindex_path(&self, rel_path: &str) -> Option { @@ -607,65 +290,3 @@ impl BreakpadSymbolDownloaderInner { .map_err(SymindexGenerationError::BreakpadParsing) } } - -/// A helper struct with a drop handler. This lets us detect when a download -/// is cancelled by dropping the future. -struct DownloadStatusReporter { - /// Set to `None` when `download_failed()` or `download_completed()` is called. - download_id: Option, - observer: Option>, - ts_before_connect: Instant, -} - -impl DownloadStatusReporter { - pub fn new(download_id: u64, observer: Option>) -> Self { - Self { - download_id: Some(download_id), - observer, - ts_before_connect: Instant::now(), - } - } - - pub fn download_id(&self) -> u64 { - self.download_id.unwrap() - } - - pub fn download_failed(mut self, e: DownloadError) { - if let (Some(download_id), Some(observer)) = (self.download_id, self.observer.as_deref()) { - observer.on_download_failed(download_id, e); - } - self.download_id = None; - // Drop self. Now the Drop handler won't do anything. - } - - pub fn download_completed( - mut self, - uncompressed_size_in_bytes: u64, - ts_after_headers: Instant, - ts_after_completed: Instant, - ) { - if let (Some(download_id), Some(observer)) = (self.download_id, self.observer.as_deref()) { - let time_until_headers = ts_after_headers.duration_since(self.ts_before_connect); - let time_until_completed = ts_after_completed.duration_since(self.ts_before_connect); - observer.on_download_completed( - download_id, - uncompressed_size_in_bytes, - time_until_headers, - time_until_completed, - ); - } - self.download_id = None; - // Drop self. Now the Drop handler won't do anything. - } -} - -impl Drop for DownloadStatusReporter { - fn drop(&mut self) { - if let (Some(download_id), Some(observer)) = (self.download_id, self.observer.as_deref()) { - // We were dropped before a call to `download_failed` or `download_completed`. - // This was most likely because the future we were stored in was dropped. - // Tell the observer. - observer.on_download_canceled(download_id); - } - } -} diff --git a/wholesym/src/config.rs b/wholesym/src/config.rs index 8c7cb898..7e9e9f9f 100644 --- a/wholesym/src/config.rs +++ b/wholesym/src/config.rs @@ -8,7 +8,6 @@ use symsrv::{parse_nt_symbol_path, NtSymbolPathEntry}; /// Allows specifying various sources of symbol files. #[derive(Debug, Clone, Default)] pub struct SymbolManagerConfig { - pub(crate) verbose: bool, pub(crate) redirect_paths: HashMap, pub(crate) respect_nt_symbol_path: bool, pub(crate) default_nt_symbol_path: Option, @@ -30,12 +29,6 @@ impl SymbolManagerConfig { Self::default() } - /// Turns logging on or off. - pub fn verbose(mut self, verbose: bool) -> Self { - self.verbose = verbose; - self - } - /// For use in tests. Add a path which, when opened, opens a file at a different path instead. /// /// This can be used to test debug files which refer to other files on the file system with diff --git a/wholesym/src/debuginfod.rs b/wholesym/src/debuginfod.rs index 65ca6a32..8fd40843 100644 --- a/wholesym/src/debuginfod.rs +++ b/wholesym/src/debuginfod.rs @@ -1,18 +1,21 @@ use std::path::{Path, PathBuf}; +use std::sync::Arc; -pub struct DebuginfodSymbolCache(DebuginfodSymbolCacheInner); +use crate::downloader::{Downloader, DownloaderObserver}; -enum DebuginfodSymbolCacheInner { +pub struct DebuginfodDownloader(DebuginfodDownloaderInner); + +enum DebuginfodDownloaderInner { #[allow(unused)] - Official(OfficialDebuginfodSymbolCache), - Manual(ManualDebuginfodSymbolCache), + Official(OfficialDebuginfodDownloader), + Manual(ManualDebuginfodDownloader), } -impl DebuginfodSymbolCache { +impl DebuginfodDownloader { pub fn new( debuginfod_cache_dir_if_not_installed: Option, mut servers_and_caches: Vec<(String, PathBuf)>, - verbose: bool, + downloader: Option>, ) -> Self { let is_debuginfod_installed = false; if is_debuginfod_installed { @@ -29,10 +32,12 @@ impl DebuginfodSymbolCache { let extra_servers = std::mem::replace(&mut servers_and_caches, servers_from_env); servers_and_caches.extend(extra_servers); } - Self(DebuginfodSymbolCacheInner::Manual( - ManualDebuginfodSymbolCache { + + Self(DebuginfodDownloaderInner::Manual( + ManualDebuginfodDownloader { servers_and_caches, - verbose, + observer: None, + downloader: downloader.unwrap_or_default(), }, )) } @@ -41,10 +46,10 @@ impl DebuginfodSymbolCache { #[allow(unused)] pub async fn get_file_only_cached(&self, buildid: &str, file_type: &str) -> Option { match &self.0 { - DebuginfodSymbolCacheInner::Official(official) => { + DebuginfodDownloaderInner::Official(official) => { official.get_file_only_cached(buildid, file_type).await } - DebuginfodSymbolCacheInner::Manual(manual) => { + DebuginfodDownloaderInner::Manual(manual) => { manual.get_file_only_cached(buildid, file_type).await } } @@ -52,18 +57,33 @@ impl DebuginfodSymbolCache { pub async fn get_file(&self, buildid: &str, file_type: &str) -> Option { match &self.0 { - DebuginfodSymbolCacheInner::Official(official) => { + DebuginfodDownloaderInner::Official(official) => { official.get_file(buildid, file_type).await } - DebuginfodSymbolCacheInner::Manual(manual) => manual.get_file(buildid, file_type).await, + DebuginfodDownloaderInner::Manual(manual) => manual.get_file(buildid, file_type).await, + } + } + + /// Set the observer for this downloader. + /// + /// The observer can be used for logging, displaying progress bars, informing + /// automatic expiration of cached files, and so on. + /// + /// See the [`DownloaderObserver`] trait for more information. + pub fn set_observer(&mut self, observer: Option>) { + match &mut self.0 { + DebuginfodDownloaderInner::Official(official) => official.set_observer(observer), + DebuginfodDownloaderInner::Manual(manual) => manual.set_observer(observer), } } } /// Uses debuginfod-find on the shell maybe, not sure -struct OfficialDebuginfodSymbolCache; +struct OfficialDebuginfodDownloader; + +impl OfficialDebuginfodDownloader { + pub fn set_observer(&mut self, _observer: Option>) {} -impl OfficialDebuginfodSymbolCache { pub async fn get_file_only_cached(&self, _buildid: &str, _file_type: &str) -> Option { None // TODO } @@ -73,19 +93,38 @@ impl OfficialDebuginfodSymbolCache { } } -/// Full reimplementation of a `debuginfod` client, used on non-Linux platforms or on Linux if debuginfod is not installed. +/// A `debuginfod` client, used on non-Linux platforms or on Linux if debuginfod is not installed. /// /// Does not use the official debuginfod's cache directory because the cache directory structure is not a stable API. -struct ManualDebuginfodSymbolCache { +struct ManualDebuginfodDownloader { servers_and_caches: Vec<(String, PathBuf)>, - verbose: bool, + observer: Option>, + downloader: Arc, } -impl ManualDebuginfodSymbolCache { +impl ManualDebuginfodDownloader { + pub fn set_observer(&mut self, observer: Option>) { + self.observer = observer; + } + + /// Return whether a file is found at `path`, and notify the observer if not. + async fn check_file_exists(&self, path: &Path) -> bool { + let file_exists = matches!(tokio::fs::metadata(path).await, Ok(meta) if meta.is_file()); + if !file_exists { + if let Some(observer) = self.observer.as_deref() { + observer.on_file_missed(path); + } + } + file_exists + } + pub async fn get_file_only_cached(&self, buildid: &str, file_type: &str) -> Option { for (_server_base_url, cache_dir) in &self.servers_and_caches { let cached_file_path = cache_dir.join(buildid).join(file_type); - if cached_file_path.exists() { + if self.check_file_exists(&cached_file_path).await { + if let Some(observer) = self.observer.as_deref() { + observer.on_file_accessed(&cached_file_path); + } return Some(cached_file_path); } } @@ -98,7 +137,7 @@ impl ManualDebuginfodSymbolCache { } for (server_base_url, cache_dir) in &self.servers_and_caches { - if let Ok(file) = self + if let Some(file) = self .get_file_from_server(buildid, file_type, server_base_url, cache_dir) .await { @@ -114,28 +153,18 @@ impl ManualDebuginfodSymbolCache { file_type: &str, server_base_url: &str, cache_dir: &Path, - ) -> Result> { + ) -> Option { + let dest_path = cache_dir.join(buildid).join(file_type); let server_base_url = server_base_url.trim_end_matches('/'); let url = format!("{server_base_url}/buildid/{buildid}/{file_type}"); - if self.verbose { - eprintln!("Downloading {url}..."); - } - let sym_file_response = reqwest::get(&url).await?.error_for_status()?; - let mut stream = sym_file_response.bytes_stream(); - let dest_path = cache_dir.join(buildid).join(file_type); - if let Some(dir) = dest_path.parent() { - tokio::fs::create_dir_all(dir).await?; - } - if self.verbose { - eprintln!("Saving bytes to {dest_path:?}."); - } - let file = tokio::fs::File::create(&dest_path).await?; - let mut writer = tokio::io::BufWriter::new(file); - use futures_util::StreamExt; - while let Some(item) = stream.next().await { - tokio::io::copy(&mut item?.as_ref(), &mut writer).await?; - } - drop(writer); - Ok(dest_path) + + let download = self + .downloader + .initiate_download(&url, self.observer.clone()) + .await + .ok()?; + download.download_to_file(&dest_path, None).await.ok()?; + + Some(dest_path) } } diff --git a/wholesym/src/download.rs b/wholesym/src/download.rs index 9cdb6525..37c903ae 100644 --- a/wholesym/src/download.rs +++ b/wholesym/src/download.rs @@ -1,7 +1,11 @@ +use std::pin::Pin; +use std::sync::Mutex; +use std::task::Poll; + use async_compression::futures::bufread::GzipDecoder; -use futures_util::{io::BufReader, AsyncRead, TryStreamExt}; +use futures_util::io::BufReader; +use futures_util::{AsyncRead, TryStreamExt}; use reqwest::header::{AsHeaderName, HeaderMap, CONTENT_ENCODING, CONTENT_LENGTH}; -use std::{pin::Pin, sync::Mutex, task::Poll}; fn get_header(headers: &HeaderMap, name: K) -> Option { Some(headers.get(name)?.to_str().ok()?.to_ascii_lowercase()) diff --git a/wholesym/src/download_error.rs b/wholesym/src/download_error.rs new file mode 100644 index 00000000..dceed135 --- /dev/null +++ b/wholesym/src/download_error.rs @@ -0,0 +1,43 @@ +/// The error type used in the observer notification [`SymbolManagerObserver::on_download_failed`]. +#[derive(thiserror::Error, Debug)] +pub enum DownloadError { + /// Creating the reqwest Client failed. + #[error("Creating the reqwest client failed: {0}")] + ClientCreationFailed(String), + + /// Opening the request failed. + #[error("Opening the request failed: {0}")] + OpenFailed(Box), + + /// The download timed out. + #[error("The download timed out")] + Timeout, + + /// The server returned a non-success status code. + #[error("The server returned status code {0}")] + StatusError(u16), + + /// The destination directory could not be created. + #[error("The destination directory could not be created")] + CouldNotCreateDestinationDirectory, + + /// The response used an unexpected Content-Encoding. + #[error("The response used an unexpected Content-Encoding: {0}")] + UnexpectedContentEncoding(String), + + /// An error occurred when reading the download stream. + #[error("Error when reading the download stream: {0}")] + StreamRead(std::io::Error), + + /// An I/O error occurred while writing the downloaded file. + #[error("Error while writing the downloaded file to disk: {0}")] + DiskWrite(std::io::Error), + + /// Redirect-related error. + #[error("Redirect-related error")] + Redirect(Box), + + /// Other error. + #[error("Other error: {0}")] + Other(Box), +} diff --git a/wholesym/src/downloader.rs b/wholesym/src/downloader.rs new file mode 100644 index 00000000..2f436a6c --- /dev/null +++ b/wholesym/src/downloader.rs @@ -0,0 +1,457 @@ +use std::path::Path; +use std::pin::Pin; +use std::sync::atomic::AtomicU64; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use futures_util::AsyncRead; +use futures_util::AsyncReadExt as _; +use tokio::io::AsyncWriteExt; + +use crate::download::response_to_uncompressed_stream_with_progress; +use crate::file_creation::{create_file_cleanly, CleanFileCreationError}; +use crate::DownloadError; + +/// A trait for observing the behavior of a `BreakpadSymbolDownloader` or `DebuginfodDownloader`. +/// This can be used for logging, displaying progress bars, expiring cached files, etc. +pub trait DownloaderObserver: Send + Sync + 'static { + /// Called when a new download is about to start, before the connection is established. + /// + /// The download ID is unique for each download. + /// + /// For each download ID, we guarantee that exactly one of the following methods + /// will be called at the end of the download: `on_download_completed`, + /// `on_download_failed`, or `on_download_canceled`. + fn on_new_download_before_connect(&self, download_id: u64, url: &str); + + /// Called once the connection has been established and HTTP headers + /// with a success status have arrived. + fn on_download_started(&self, download_id: u64); + + /// Called frequently during the download, whenever a new chunk has been read. + /// + /// If the HTTP response is gzip-compressed, the number of bytes can refer to + /// either the compressed or the uncompressed bytes - but it'll be consistent: + /// Either both `bytes_so_far` and `total_bytes` refer to the compressed sizes, + /// or both refer to the uncompressed sizes. + /// + /// If `total_bytes` is `None`, the total size is unknown. + fn on_download_progress(&self, download_id: u64, bytes_so_far: u64, total_bytes: Option); + + /// Called when the download has completed successfully. + /// + /// Mutually exclusive with `on_download_failed` and `on_download_canceled` for a + /// given download ID. + fn on_download_completed( + &self, + download_id: u64, + uncompressed_size_in_bytes: u64, + time_until_headers: Duration, + time_until_completed: Duration, + ); + + /// Called when the download has failed. + /// + /// This is quite common; the most common reason is [`DownloadError::StatusError`] + /// with [`StatusCode::NOT_FOUND`](http::StatusCode::NOT_FOUND), for files which + /// are not available on the server. + /// + /// Mutually exclusive with `on_download_completed` and `on_download_canceled` for a + /// given download ID. + fn on_download_failed(&self, download_id: u64, reason: DownloadError); + + /// Called when the download has been canceled. + /// + /// This does not indicate an error. We commonly attempt to download a file from + /// multiple sources simultaneously, and cancel other downloads once one has succeeded. + /// + /// This function is also called if the user cancels the download by dropping the future + /// returned from [`BreakpadSymbolDownloader::get_file`]. + /// + /// Mutually exclusive with `on_download_completed` and `on_download_failed` for a + /// given download ID. + fn on_download_canceled(&self, download_id: u64); + + /// Called when a file has been created, for example because it was downloaded from + /// a server, copied from a different cache directory, or extracted from a compressed + /// file. + fn on_file_created(&self, path: &Path, size_in_bytes: u64); + + /// Called when a file from the cache has been used to service a [`BreakpadSymbolDownloader::get_file`] call. + /// + /// This is only called for pre-existing files and not for newly-created files - newly-created + /// files only trigger a call to `on_file_created`. + /// + /// Useful to guide expiration decisions. + fn on_file_accessed(&self, path: &Path); + + /// Called when we were looking for a file in the cache, and it wasn't there. Used for + /// debug logging. + /// + /// Also called if checking for file existence fails for any other reason. + fn on_file_missed(&self, path: &Path); +} + +static NEXT_DOWNLOAD_ID: AtomicU64 = AtomicU64::new(0); + +/// A helper struct with a drop handler. This lets us detect when a download +/// is cancelled by dropping the future. +pub struct DownloadStatusReporter { + /// Set to `None` when `download_failed()` or `download_completed()` is called. + download_id: Option, + observer: Option>, + ts_before_connect: Instant, +} + +impl DownloadStatusReporter { + pub fn new(observer: Option>, url: &str) -> Self { + let download_id = NEXT_DOWNLOAD_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + + if let Some(observer) = &observer { + observer.on_new_download_before_connect(download_id, url); + } + + Self { + download_id: Some(download_id), + observer, + ts_before_connect: Instant::now(), + } + } + + pub fn download_id(&self) -> u64 { + self.download_id.unwrap() + } + + pub fn download_failed(mut self, e: DownloadError) { + if let (Some(download_id), Some(observer)) = (self.download_id, self.observer.as_deref()) { + observer.on_download_failed(download_id, e); + } + self.download_id = None; + // Drop self. Now the Drop handler won't do anything. + } + + pub fn download_completed( + mut self, + uncompressed_size_in_bytes: u64, + ts_after_headers: Instant, + ts_after_completed: Instant, + ) { + if let (Some(download_id), Some(observer)) = (self.download_id, self.observer.as_deref()) { + let time_until_headers = ts_after_headers.duration_since(self.ts_before_connect); + let time_until_completed = ts_after_completed.duration_since(self.ts_before_connect); + observer.on_download_completed( + download_id, + uncompressed_size_in_bytes, + time_until_headers, + time_until_completed, + ); + } + self.download_id = None; + // Drop self. Now the Drop handler won't do anything. + } +} + +impl Drop for DownloadStatusReporter { + fn drop(&mut self) { + if let (Some(download_id), Some(observer)) = (self.download_id, self.observer.as_deref()) { + // We were dropped before a call to `download_failed` or `download_completed`. + // This was most likely because the future we were stored in was dropped. + // Tell the observer. + observer.on_download_canceled(download_id); + } + } +} + +pub struct Downloader { + reqwest_client: Result, +} + +impl Default for Downloader { + fn default() -> Self { + Downloader::new() + } +} + +impl Downloader { + pub fn new() -> Self { + let builder = reqwest::Client::builder(); + + // Turn off HTTP 2, in order to work around https://github.com/seanmonstar/reqwest/issues/1761 . + let builder = builder.http1_only(); + + // Turn off automatic decompression because it doesn't allow us to compute + // download progress percentages: we'd only know the decompressed current + // size and the compressed total size. + // Instead, we do the streaming decompression manually, see download.rs. + let builder = builder.no_gzip().no_brotli().no_deflate(); + + // Create the client. + // TODO: Add timeouts, user agent, maybe other settings + let reqwest_client = builder.build(); + + Self { reqwest_client } + } + + pub async fn initiate_download( + &self, + url: &str, + observer: Option>, + ) -> Result { + let reporter = DownloadStatusReporter::new(observer.clone(), url); + + let reqwest_client = match self.reqwest_client.as_ref() { + Ok(client) => client, + Err(e) => { + reporter.download_failed(DownloadError::ClientCreationFailed(e.to_string())); + return Err(DownloadError::ClientCreationFailed(e.to_string())); + } + }; + + let request_builder = reqwest_client.get(url); + + // Manually specify the Accept-Encoding header. + // This would happen automatically if we hadn't turned off automatic + // decompression for this reqwest client. + let request_builder = request_builder.header("Accept-Encoding", "gzip"); + + // Send the request and wait for the headers. + let response_result = request_builder.send().await; + + // Check the HTTP status code. + let response_result = response_result.and_then(|response| response.error_for_status()); + + let response = match response_result { + Ok(response) => response, + Err(e) if e.is_status() => { + let status = e.status().unwrap().as_u16(); + reporter.download_failed(DownloadError::StatusError(status)); + return Err(DownloadError::StatusError(status)); + } + Err(e) if e.is_request() => { + let s = e.to_string(); + reporter.download_failed(DownloadError::OpenFailed(e.into())); + return Err(DownloadError::OpenFailed(s.into())); + } + Err(e) if e.is_redirect() => { + let s = e.to_string(); + reporter.download_failed(DownloadError::Redirect(e.into())); + return Err(DownloadError::Redirect(s.into())); + } + Err(e) if e.is_timeout() => { + reporter.download_failed(DownloadError::Timeout); + return Err(DownloadError::Timeout); + } + Err(e) => { + let s = e.to_string(); + reporter.download_failed(DownloadError::Other(e.into())); + return Err(DownloadError::Other(s.into())); + } + }; + + let ts_after_status = Instant::now(); + + let observer2 = observer.clone(); + let download_id = reporter.download_id(); + + let stream = match response_to_uncompressed_stream_with_progress( + response, + move |bytes_so_far, total_bytes| { + if let Some(observer) = observer2.as_deref() { + observer.on_download_progress(download_id, bytes_so_far, total_bytes) + } + }, + ) { + Ok(stream) => stream, + Err(crate::download::Error::UnexpectedContentEncoding(encoding)) => { + reporter + .download_failed(DownloadError::UnexpectedContentEncoding(encoding.clone())); + return Err(DownloadError::UnexpectedContentEncoding(encoding)); + } + }; + Ok(PendingDownload { + reporter, + stream, + observer, + ts_after_status, + }) + } +} + +pub struct PendingDownload { + reporter: DownloadStatusReporter, + stream: Pin>, + observer: Option>, + ts_after_status: Instant, +} + +pub enum FileDownloadOutcome { + DidCreateNewFile, + FoundExistingFile, +} + +impl PendingDownload { + #[allow(clippy::type_complexity)] + pub async fn download_to_file( + self, + dest_path: &Path, + mut chunk_consumer: Option<&mut (dyn FnMut(&[u8]) + Send)>, + ) -> Result { + let PendingDownload { + reporter, + mut stream, + observer, + ts_after_status, + } = self; + let download_id = reporter.download_id(); + if let Some(observer) = observer.as_deref() { + observer.on_download_started(download_id); + } + + if let Some(dir) = dest_path.parent() { + match tokio::fs::create_dir_all(dir).await { + Ok(_) => {} + Err(_e) => { + reporter.download_failed(DownloadError::CouldNotCreateDestinationDirectory); + return Err(DownloadError::CouldNotCreateDestinationDirectory); + } + } + } + + let download_result: Result< + (FileDownloadOutcome, u64), + CleanFileCreationError, + > = create_file_cleanly( + dest_path, + |dest_file: std::fs::File| async move { + let mut dest_file = tokio::fs::File::from_std(dest_file); + let mut buf = vec![0u8; 4096]; + let mut uncompressed_size_in_bytes = 0; + loop { + let count = stream + .read(&mut buf) + .await + .map_err(DownloadError::StreamRead)?; + if count == 0 { + break; + } + uncompressed_size_in_bytes += count as u64; + dest_file + .write_all(&buf[..count]) + .await + .map_err(DownloadError::DiskWrite)?; + if let Some(chunk_consumer) = &mut chunk_consumer { + chunk_consumer(&buf[..count]); + } + } + dest_file.flush().await.map_err(DownloadError::DiskWrite)?; + Ok(( + FileDownloadOutcome::DidCreateNewFile, + uncompressed_size_in_bytes, + )) + }, + || async { + let size = std::fs::metadata(dest_path) + .map_err(DownloadError::DiskWrite)? + .len(); + Ok((FileDownloadOutcome::FoundExistingFile, size)) + }, + ) + .await; + + let (outcome, uncompressed_size_in_bytes) = match download_result { + Ok(outcome_and_size) => outcome_and_size, + Err(CleanFileCreationError::CallbackIndicatedError(e)) => { + let cloned_error = match &e { + DownloadError::StreamRead(e) => { + DownloadError::StreamRead(std::io::Error::new(e.kind(), e.to_string())) + } + DownloadError::DiskWrite(e) => { + DownloadError::DiskWrite(std::io::Error::new(e.kind(), e.to_string())) + } + e => DownloadError::Other(e.to_string().into()), + }; + reporter.download_failed(e); + return Err(cloned_error); + } + Err(e) => { + let s = e.to_string(); + reporter.download_failed(DownloadError::DiskWrite(e.into())); + return Err(DownloadError::DiskWrite(std::io::Error::new( + std::io::ErrorKind::Other, + s, + ))); + } + }; + + let ts_after_download = Instant::now(); + reporter.download_completed( + uncompressed_size_in_bytes, + ts_after_status, + ts_after_download, + ); + + if let Some(observer) = &observer { + observer.on_file_created(dest_path, uncompressed_size_in_bytes); + } + + Ok(outcome) + } + + #[allow(clippy::type_complexity)] + #[allow(dead_code)] + pub async fn download_to_memory( + self, + mut chunk_consumer: Option<&mut (dyn FnMut(&[u8]) + Send)>, + ) -> Result, DownloadError> { + let PendingDownload { + reporter, + mut stream, + observer, + ts_after_status, + } = self; + let download_id = reporter.download_id(); + if let Some(observer) = observer.as_deref() { + observer.on_download_started(download_id); + } + + let mut bytes = Vec::new(); + let bytes_ref = &mut bytes; + + let download_result: Result = async move { + let mut buf = vec![0u8; 4096]; + let mut uncompressed_size_in_bytes = 0; + loop { + let count = stream.read(&mut buf).await?; + if count == 0 { + break; + } + uncompressed_size_in_bytes += count as u64; + bytes_ref.extend_from_slice(&buf[..count]); + if let Some(chunk_consumer) = &mut chunk_consumer { + chunk_consumer(&buf[..count]); + } + } + Ok(uncompressed_size_in_bytes) + } + .await; + + let uncompressed_size_in_bytes = match download_result { + Ok(size) => size, + Err(e) => { + let kind = e.kind(); + let s = e.to_string(); + reporter.download_failed(DownloadError::StreamRead(e)); + return Err(DownloadError::StreamRead(std::io::Error::new(kind, s))); + } + }; + + let ts_after_download = Instant::now(); + reporter.download_completed( + uncompressed_size_in_bytes, + ts_after_status, + ts_after_download, + ); + + Ok(bytes) + } +} diff --git a/wholesym/src/file_creation.rs b/wholesym/src/file_creation.rs index a9a595f9..9441de04 100644 --- a/wholesym/src/file_creation.rs +++ b/wholesym/src/file_creation.rs @@ -1,7 +1,8 @@ use std::io; use std::path::Path; -use fs4::{fs_std::FileExt, lock_contended_error}; +use fs4::fs_std::FileExt; +use fs4::lock_contended_error; /// The error type for the `create_file_cleanly` function. #[derive(thiserror::Error, Debug)] diff --git a/wholesym/src/helper.rs b/wholesym/src/helper.rs index fc388ad1..5550de4b 100644 --- a/wholesym/src/helper.rs +++ b/wholesym/src/helper.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::fs::{self, File}; use std::path::{Path, PathBuf}; +use std::sync::atomic::AtomicU64; use std::sync::{Arc, Mutex}; use bytes::Bytes; @@ -12,10 +13,12 @@ use samply_symbols::{ use symsrv::{SymsrvDownloader, SymsrvObserver}; use uuid::Uuid; -use crate::breakpad::{BreakpadSymbolDownloader, BreakpadSymbolObserver}; +use crate::breakpad::BreakpadSymbolDownloader; use crate::config::SymbolManagerConfig; -use crate::debuginfod::DebuginfodSymbolCache; +use crate::debuginfod::DebuginfodDownloader; +use crate::downloader::{Downloader, DownloaderObserver}; use crate::vdso::get_vdso_data; +use crate::{DownloadError, SymbolManagerObserver}; /// This is how the symbol file contents are returned. If there's an uncompressed file /// in the store, then we return an Mmap of that uncompressed file. If there is no @@ -240,12 +243,14 @@ impl FileAndPathHelper for FileReadOnlyHelper { } pub struct Helper { + downloader: Arc, symsrv_downloader: Option, breakpad_downloader: BreakpadSymbolDownloader, - debuginfod_symbol_cache: Option, + debuginfod_downloader: Option, known_libs: Mutex, config: SymbolManagerConfig, precog_symbol_data: Mutex>>, + observer: Arc, } #[derive(Debug, Clone, Default)] @@ -258,23 +263,25 @@ struct KnownLibs { impl Helper { pub fn with_config(config: SymbolManagerConfig) -> Self { + let observer = Arc::new(HelperDownloaderObserver::new()); + let downloader = Arc::new(Downloader::new()); let symsrv_downloader = match config.effective_nt_symbol_path() { Some(nt_symbol_path) => { let mut downloader = SymsrvDownloader::new(nt_symbol_path); downloader.set_default_downstream_store(symsrv::get_home_sym_dir()); - if config.verbose { - downloader.set_observer(Some(Arc::new(VerboseDownloaderObserver::new()))); - } + downloader.set_observer(Some(observer.clone())); Some(downloader) } None => None, }; - let debuginfod_symbol_cache = if config.use_debuginfod { - Some(DebuginfodSymbolCache::new( + let debuginfod_downloader = if config.use_debuginfod { + let mut downloader = DebuginfodDownloader::new( config.debuginfod_cache_dir_if_not_installed.clone(), config.debuginfod_servers.clone(), - config.verbose, - )) + Some(downloader.clone()), + ); + downloader.set_observer(Some(observer.clone())); + Some(downloader) } else { None }; @@ -282,20 +289,25 @@ impl Helper { config.breakpad_directories_readonly.clone(), config.breakpad_servers.clone(), config.breakpad_symindex_cache_dir.clone(), + Some(downloader.clone()), ); - if config.verbose { - breakpad_downloader.set_observer(Some(Arc::new(VerboseDownloaderObserver::new()))); - } + breakpad_downloader.set_observer(Some(observer.clone())); Self { + downloader, symsrv_downloader, breakpad_downloader, - debuginfod_symbol_cache, + debuginfod_downloader, known_libs: Mutex::new(Default::default()), config, precog_symbol_data: Mutex::new(Default::default()), + observer, } } + pub fn set_observer(&self, observer: Option>) { + self.observer.set_observer(observer); + } + pub fn add_known_lib(&self, lib_info: LibraryInfo) { let mut known_libs = self.known_libs.lock().unwrap(); let lib_info = Arc::new(lib_info); @@ -334,156 +346,99 @@ impl Helper { precog_symbol_data.insert(debug_id, symbol_map); } + /// Return whether a file is found at `path`, and notify the observer if not. + async fn check_file_exists(&self, path: &Path) -> bool { + let file_exists = matches!(tokio::fs::metadata(path).await, Ok(meta) if meta.is_file()); + if !file_exists { + self.observer.on_file_missed(path); + } + file_exists + } + async fn load_file_impl( &self, location: WholesymFileLocation, ) -> FileAndPathHelperResult { - match location { + let file_path = match location { WholesymFileLocation::LocalFile(path) => { - if self.config.verbose { - eprintln!("Opening file {:?}", path.to_string_lossy()); - } let path = self.config.redirect_paths.get(&path).unwrap_or(&path); - let file = File::open(path)?; - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&file)? - })) + if !self.check_file_exists(path).await { + return Err(format!("File not found: {path:?}").into()); + } + path.to_owned() } WholesymFileLocation::LocalSymsrvFile(filename, hash) => { - if self.config.verbose { - eprintln!( - "Trying to get file {filename} {hash} from symbol cache (no download)" - ); - } - let file_path = self - .symsrv_downloader + self.symsrv_downloader .as_ref() .unwrap() .get_file_no_download(&filename, &hash) - .await?; - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&File::open(file_path)?)? - })) - } - WholesymFileLocation::LocalBreakpadFile(rel_path) => { - let path = self - .breakpad_downloader - .get_file_no_download(&rel_path) - .await - .ok_or("Not found on breakpad symbol server")?; - if self.config.verbose { - eprintln!("Opening file {:?}", path.to_string_lossy()); - } - let file = File::open(path)?; - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&file)? - })) + .await? } + WholesymFileLocation::LocalBreakpadFile(rel_path) => self + .breakpad_downloader + .get_file_no_download(&rel_path) + .await + .ok_or("Not found on breakpad symbol server")?, WholesymFileLocation::UrlForSourceFile(url) => { - if self.config.verbose { - eprintln!("Trying to get file {url} from a URL"); - } - let bytes = reqwest::get(&url).await?.bytes().await?; - Ok(WholesymFileContents::Bytes(bytes)) + let download = self + .downloader + .initiate_download(&url, Some(self.observer.clone())) + .await?; + let bytes = download.download_to_memory(None).await?; + return Ok(WholesymFileContents::Bytes(bytes.into())); } WholesymFileLocation::SymsrvFile(filename, hash) => { - if self.config.verbose { - eprintln!( - "Trying to get file {filename} {hash} from symbol cache (download allowed)" - ); - } - let file_path = self - .symsrv_downloader + self.symsrv_downloader .as_ref() .unwrap() .get_file(&filename, &hash) - .await?; - if self.config.verbose { - eprintln!("Opening file {:?}", file_path.to_string_lossy()); - } - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&File::open(file_path)?)? - })) - } - WholesymFileLocation::BreakpadSymbolServerFile(path) => { - if self.config.verbose { - eprintln!("Trying to get file {path:?} from breakpad symbol server"); - } - let file_path = self - .breakpad_downloader - .get_file(&path) - .await - .ok_or("Not found on breakpad symbol server")?; - if self.config.verbose { - eprintln!("Opening file {:?}", file_path.to_string_lossy()); - } - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&File::open(file_path)?)? - })) + .await? } + WholesymFileLocation::BreakpadSymbolServerFile(path) => self + .breakpad_downloader + .get_file(&path) + .await + .ok_or("Not found on breakpad symbol server")?, WholesymFileLocation::BreakpadSymindexFile(rel_path) => { let sym_path = self .breakpad_downloader .get_file_no_download(&rel_path) .await .ok_or("Not found in breakpad symbol directories")?; - let file_path = self - .breakpad_downloader + self.breakpad_downloader .ensure_symindex(&sym_path, &rel_path) - .await?; - if self.config.verbose { - eprintln!("Opening file {:?}", file_path.to_string_lossy()); - } - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&File::open(file_path)?)? - })) - } - WholesymFileLocation::DebuginfodDebugFile(build_id) => { - let file_path = self - .debuginfod_symbol_cache - .as_ref() - .unwrap() - .get_file(&build_id.to_string(), "debuginfo") - .await - .ok_or("Debuginfod could not find debuginfo")?; - if self.config.verbose { - eprintln!("Opening file {:?}", file_path.to_string_lossy()); - } - - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&File::open(file_path)?)? - })) - } - WholesymFileLocation::DebuginfodExecutable(build_id) => { - let file_path = self - .debuginfod_symbol_cache - .as_ref() - .unwrap() - .get_file(&build_id.to_string(), "debuginfo") - .await - .ok_or("Debuginfod could not find debuginfo")?; - if self.config.verbose { - eprintln!("Opening file {:?}", file_path.to_string_lossy()); - } - - Ok(WholesymFileContents::Mmap(unsafe { - memmap2::MmapOptions::new().map(&File::open(file_path)?)? - })) + .await? } + WholesymFileLocation::DebuginfodDebugFile(build_id) => self + .debuginfod_downloader + .as_ref() + .unwrap() + .get_file(&build_id.to_string(), "debuginfo") + .await + .ok_or("Debuginfod could not find debuginfo")?, + WholesymFileLocation::DebuginfodExecutable(build_id) => self + .debuginfod_downloader + .as_ref() + .unwrap() + .get_file(&build_id.to_string(), "executable") + .await + .ok_or("Debuginfod could not find executable")?, WholesymFileLocation::VdsoLoadedIntoThisProcess => { - if let Some(vdso) = get_vdso_data() { - // Pretend that the VDSO data came from a file. - // This works more or less by accident; object's parsing is made for - // objects stored on disk, not for objects loaded into memory. - // However, the VDSO in-memory image happens to be similar enough to its - // equivalent on-disk image that this works fine. Most importantly, the - // VDSO's section SVMAs match the section file offsets. - Ok(WholesymFileContents::Bytes(Bytes::copy_from_slice(vdso))) - } else { - Err("No vdso in this process".into()) - } + let vdso = get_vdso_data().ok_or("No vdso in this process")?; + // Pretend that the VDSO data came from a file. + // This works more or less by accident; object's parsing is made for + // objects stored on disk, not for objects loaded into memory. + // However, the VDSO in-memory image happens to be similar enough to its + // equivalent on-disk image that this works fine. Most importantly, the + // VDSO's section SVMAs match the section file offsets. + return Ok(WholesymFileContents::Bytes(Bytes::copy_from_slice(vdso))); } - } + }; + + self.observer.on_file_accessed(&file_path); + Ok(WholesymFileContents::Mmap(unsafe { + memmap2::MmapOptions::new().map(&File::open(file_path)?)? + })) } fn fill_in_library_info_details(&self, info: &mut LibraryInfo) { @@ -672,7 +627,7 @@ impl FileAndPathHelper for Helper { if !might_be_fake_jit_file(&info) { if let (Some(_debuginfod_symbol_cache), Some(CodeId::ElfBuildId(build_id))) = - (self.debuginfod_symbol_cache.as_ref(), &info.code_id) + (self.debuginfod_downloader.as_ref(), &info.code_id) { paths.push(CandidatePathInfo::SingleFile( WholesymFileLocation::DebuginfodDebugFile(build_id.to_owned()), @@ -840,11 +795,14 @@ impl FileAndPathHelper for Helper { } if let (Some(_debuginfod_symbol_cache), Some(CodeId::ElfBuildId(build_id))) = - (self.debuginfod_symbol_cache.as_ref(), &info.code_id) + (self.debuginfod_downloader.as_ref(), &info.code_id) { paths.push(CandidatePathInfo::SingleFile( WholesymFileLocation::DebuginfodExecutable(build_id.to_owned()), )); + paths.push(CandidatePathInfo::SingleFile( + WholesymFileLocation::DebuginfodDebugFile(build_id.to_owned()), + )); } } @@ -896,7 +854,7 @@ impl FileAndPathHelper for Helper { let path = format!("/usr/lib/debug/.build-id/{two_chars}/{rest}.debug"); paths.push(WholesymFileLocation::LocalFile(PathBuf::from(path))); - if self.debuginfod_symbol_cache.is_some() { + if self.debuginfod_downloader.is_some() { paths.push(WholesymFileLocation::DebuginfodDebugFile( sup_file_build_id.to_owned(), )); @@ -968,60 +926,147 @@ fn might_be_fake_jit_file(info: &LibraryInfo) -> bool { matches!(&info.name, Some(name) if (name.starts_with("jitted-") && name.ends_with(".so")) || name.contains("jit_app_cache:")) } -struct VerboseDownloaderObserver { - urls: Mutex>, +struct HelperDownloaderObserver { + inner: Mutex, } -impl VerboseDownloaderObserver { - fn new() -> Self { +struct HelperDownloaderObserverInner { + observer: Option>, + symsrv_download_id_mapping: HashMap, + downloader_download_id_mapping: HashMap, +} + +impl HelperDownloaderObserver { + pub fn new() -> Self { + let inner = HelperDownloaderObserverInner { + observer: None, + symsrv_download_id_mapping: HashMap::new(), + downloader_download_id_mapping: HashMap::new(), + }; Self { - urls: Mutex::new(HashMap::new()), + inner: Mutex::new(inner), + } + } + + pub fn set_observer(&self, observer: Option>) { + let mut inner = self.inner.lock().unwrap(); + inner.observer = observer; + } + + pub fn on_file_accessed(&self, path: &Path) { + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + observer.on_file_accessed(path); + } + } + + pub fn on_file_missed(&self, path: &Path) { + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + observer.on_file_missed(path); } } } -impl SymsrvObserver for VerboseDownloaderObserver { - fn on_new_download_before_connect(&self, download_id: u64, url: &str) { - eprintln!("Connecting to {}...", url); - self.urls - .lock() - .unwrap() - .insert(download_id, url.to_owned()); +static NEXT_DOWNLOAD_ID: AtomicU64 = AtomicU64::new(0); + +impl SymsrvObserver for HelperDownloaderObserver { + fn on_new_download_before_connect(&self, symsrv_download_id: u64, url: &str) { + let download_id = NEXT_DOWNLOAD_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let mut inner = self.inner.lock().unwrap(); + inner + .symsrv_download_id_mapping + .insert(symsrv_download_id, download_id); + if let Some(observer) = &inner.observer { + observer.on_new_download_before_connect(download_id, url); + } } - fn on_download_started(&self, download_id: u64) { - let urls = self.urls.lock().unwrap(); - let url = urls.get(&download_id).unwrap(); - eprintln!("Downloading from {}...", url); + fn on_download_started(&self, symsrv_download_id: u64) { + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + let download_id = inner.symsrv_download_id_mapping[&symsrv_download_id]; + observer.on_download_started(download_id); + } } fn on_download_progress( &self, - _download_id: u64, - _bytes_so_far: u64, - _total_bytes: Option, + symsrv_download_id: u64, + bytes_so_far: u64, + total_bytes: Option, ) { + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + let download_id = inner.symsrv_download_id_mapping[&symsrv_download_id]; + observer.on_download_progress(download_id, bytes_so_far, total_bytes); + } } fn on_download_completed( &self, - download_id: u64, - _uncompressed_size_in_bytes: u64, - _time_until_headers: std::time::Duration, - _time_until_completed: std::time::Duration, + symsrv_download_id: u64, + uncompressed_size_in_bytes: u64, + time_until_headers: std::time::Duration, + time_until_completed: std::time::Duration, ) { - let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); - eprintln!("Finished download from {}.", url); + let mut inner = self.inner.lock().unwrap(); + let download_id = inner + .symsrv_download_id_mapping + .remove(&symsrv_download_id) + .unwrap(); + if let Some(observer) = &inner.observer { + observer.on_download_completed( + download_id, + uncompressed_size_in_bytes, + time_until_headers, + time_until_completed, + ); + } } - fn on_download_failed(&self, download_id: u64, reason: symsrv::DownloadError) { - let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); - eprintln!("Failed to download from {url}: {reason}."); + fn on_download_failed(&self, symsrv_download_id: u64, reason: symsrv::DownloadError) { + let mut inner = self.inner.lock().unwrap(); + let download_id = inner + .symsrv_download_id_mapping + .remove(&symsrv_download_id) + .unwrap(); + if let Some(observer) = &inner.observer { + let err = match reason { + symsrv::DownloadError::ClientCreationFailed(e) => { + DownloadError::ClientCreationFailed(e) + } + symsrv::DownloadError::OpenFailed(e) => DownloadError::OpenFailed(e), + symsrv::DownloadError::Timeout => DownloadError::Timeout, + symsrv::DownloadError::StatusError(status_code) => { + DownloadError::StatusError(status_code.as_u16()) + } + symsrv::DownloadError::CouldNotCreateDestinationDirectory => { + DownloadError::CouldNotCreateDestinationDirectory + } + symsrv::DownloadError::UnexpectedContentEncoding(e) => { + DownloadError::UnexpectedContentEncoding(e) + } + symsrv::DownloadError::ErrorDuringDownloading(e) => DownloadError::StreamRead(e), + symsrv::DownloadError::ErrorWhileWritingDownloadedFile(e) => { + DownloadError::DiskWrite(e) + } + symsrv::DownloadError::Redirect(e) => DownloadError::Redirect(e), + symsrv::DownloadError::Other(e) => DownloadError::Other(e), + }; + observer.on_download_failed(download_id, err); + } } - fn on_download_canceled(&self, download_id: u64) { - let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); - eprintln!("Canceled download from {}.", url); + fn on_download_canceled(&self, symsrv_download_id: u64) { + let mut inner = self.inner.lock().unwrap(); + let download_id = inner + .symsrv_download_id_mapping + .remove(&symsrv_download_id) + .unwrap(); + if let Some(observer) = &inner.observer { + observer.on_download_canceled(download_id); + } } fn on_new_cab_extraction(&self, _extraction_id: u64, _dest_path: &Path) {} @@ -1043,67 +1088,148 @@ impl SymsrvObserver for VerboseDownloaderObserver { fn on_cab_extraction_canceled(&self, _extraction_id: u64) {} fn on_file_created(&self, path: &Path, size_in_bytes: u64) { - eprintln!("Stored {size_in_bytes} bytes at {path:?}."); + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + observer.on_file_created(path, size_in_bytes); + } } + fn on_file_accessed(&self, path: &Path) { - eprintln!("Checking if {path:?} exists... yes"); + self.on_file_accessed(path); } + fn on_file_missed(&self, path: &Path) { - eprintln!("Checking if {path:?} exists... no"); + self.on_file_missed(path); } } -impl BreakpadSymbolObserver for VerboseDownloaderObserver { - fn on_new_download_before_connect(&self, download_id: u64, url: &str) { - eprintln!("Connecting to {}...", url); - self.urls - .lock() - .unwrap() - .insert(download_id, url.to_owned()); +impl DownloaderObserver for HelperDownloaderObserver { + fn on_new_download_before_connect(&self, downloader_download_id: u64, url: &str) { + let download_id = NEXT_DOWNLOAD_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let mut inner = self.inner.lock().unwrap(); + inner + .downloader_download_id_mapping + .insert(downloader_download_id, download_id); + if let Some(observer) = &inner.observer { + observer.on_new_download_before_connect(download_id, url); + } } - fn on_download_started(&self, download_id: u64) { - let urls = self.urls.lock().unwrap(); - let url = urls.get(&download_id).unwrap(); - eprintln!("Downloading from {}...", url); + fn on_download_started(&self, downloader_download_id: u64) { + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + let download_id = inner.downloader_download_id_mapping[&downloader_download_id]; + observer.on_download_started(download_id); + } } fn on_download_progress( &self, - _download_id: u64, - _bytes_so_far: u64, - _total_bytes: Option, + downloader_download_id: u64, + bytes_so_far: u64, + total_bytes: Option, ) { + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + let download_id = inner.downloader_download_id_mapping[&downloader_download_id]; + observer.on_download_progress(download_id, bytes_so_far, total_bytes); + } } fn on_download_completed( &self, - download_id: u64, - _uncompressed_size_in_bytes: u64, - _time_until_headers: std::time::Duration, - _time_until_completed: std::time::Duration, + downloader_download_id: u64, + uncompressed_size_in_bytes: u64, + time_until_headers: std::time::Duration, + time_until_completed: std::time::Duration, ) { - let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); - eprintln!("Finished download from {}.", url); + let mut inner = self.inner.lock().unwrap(); + let download_id = inner + .downloader_download_id_mapping + .remove(&downloader_download_id) + .unwrap(); + if let Some(observer) = &inner.observer { + observer.on_download_completed( + download_id, + uncompressed_size_in_bytes, + time_until_headers, + time_until_completed, + ); + } } - fn on_download_failed(&self, download_id: u64, reason: crate::breakpad::DownloadError) { - let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); - eprintln!("Failed to download from {url}: {reason}."); + fn on_download_failed(&self, downloader_download_id: u64, reason: DownloadError) { + let mut inner = self.inner.lock().unwrap(); + let download_id = inner + .downloader_download_id_mapping + .remove(&downloader_download_id) + .unwrap(); + if let Some(observer) = &inner.observer { + observer.on_download_failed(download_id, reason); + } } - fn on_download_canceled(&self, download_id: u64) { - let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); - eprintln!("Canceled download from {}.", url); + fn on_download_canceled(&self, downloader_download_id: u64) { + let mut inner = self.inner.lock().unwrap(); + let download_id = inner + .downloader_download_id_mapping + .remove(&downloader_download_id) + .unwrap(); + if let Some(observer) = &inner.observer { + observer.on_download_canceled(download_id); + } } fn on_file_created(&self, path: &Path, size_in_bytes: u64) { - eprintln!("Stored {size_in_bytes} bytes at {path:?}."); + let inner = self.inner.lock().unwrap(); + if let Some(observer) = &inner.observer { + observer.on_file_created(path, size_in_bytes); + } } + fn on_file_accessed(&self, path: &Path) { - eprintln!("Checking if {path:?} exists... yes"); + self.on_file_accessed(path); } + fn on_file_missed(&self, path: &Path) { - eprintln!("Checking if {path:?} exists... no"); + self.on_file_missed(path); } } + +// Thoughts on logging +// +// The purpose of any logging in this file is to make it easier to diagnose missing symbols. +// However, it's super hard to know which pieces of information to log. Symbol lookup is +// basically a long sequence of "try lots of things until we find something". Many individual +// steps in this sequence are expected to fail in the normal case. +// +// When deciding whether something is worth logging, it's best to have a list of scenarios. +// So here's a list of scenarios. "I expected to see symbols, but I didn't see symbols." +// +// 1. No debug information for local rust binary: I have a release build but I forgot to +// specify debug = "true" in my Cargo.toml. +// 2. No macOS system library symbols on new macOS version due to broken dyld cache parsing: +// I am using a new macOS version and wholesym's parsing of the dyld shared cache hasn't +// been updated for the new format. +// 3. Running out of disk space for symbol files from server: I am getting symbols from a +// server (symsrv, breakpad, debuginfod), I have found the correct symbol file, but the +// download failed because my disk filled up. +// 4. Messed up environment variable syntax in Windows terminal: I wanted to get pdb symbols +// from a symbol server, but didn't set my _NT_SYMBOL_PATH environment variable correctly +// (or forgot to set it altogether) and I'm not getting Windows system library symbols or +// Firefox / Chrome symbols. +// 5. Symbols missing on server: I am profiling a build for which I expected symbols to be +// available on a symbol server but they weren't there. For example a Firefox try build, or +// a Windows driver. +// 6. Local files have changed after profiling, e.g. build ID no longer matches. +// 7. Invalid characters in library names, causing downloads to fail because the file paths +// where the downloaded files should be stored aren't valid. +// +// More generally, I want to know: +// - Did it attempt to use the local files that I think it needs to use? +// - Did it contact the symbol server I wanted it to contact? +// - Did the download succeed? If not, was it the server's fault or my machine's fault (full disk)? +// +// I think it's ok if the logging here doesn't answer all those questions. Instead, the +// questions can be answered by information in the response JSON... or I guess by something +// that's stored on the SymbolMap. diff --git a/wholesym/src/lib.rs b/wholesym/src/lib.rs index 3c957c14..bdc3887c 100644 --- a/wholesym/src/lib.rs +++ b/wholesym/src/lib.rs @@ -133,15 +133,20 @@ mod breakpad; mod config; mod debuginfod; mod download; +mod download_error; +mod downloader; mod file_creation; mod helper; mod moria_mac; #[cfg(target_os = "macos")] mod moria_mac_spotlight; mod symbol_manager; +mod symbol_manager_observer; mod vdso; +mod verbose_symbol_manager_observer; pub use config::SymbolManagerConfig; +pub use download_error::DownloadError; pub use samply_symbols; pub use samply_symbols::{ AddressInfo, CodeId, ElfBuildId, Error, ExternalFileAddressInFileRef, ExternalFileAddressRef, @@ -150,3 +155,5 @@ pub use samply_symbols::{ SyncAddressInfo, }; pub use symbol_manager::{SymbolFileOrigin, SymbolManager, SymbolMap}; +pub use symbol_manager_observer::SymbolManagerObserver; +pub use verbose_symbol_manager_observer::VerboseSymbolManagerObserver; diff --git a/wholesym/src/symbol_manager.rs b/wholesym/src/symbol_manager.rs index 328738e9..e6a1c496 100644 --- a/wholesym/src/symbol_manager.rs +++ b/wholesym/src/symbol_manager.rs @@ -10,6 +10,7 @@ use samply_symbols::{ use crate::config::SymbolManagerConfig; use crate::helper::{FileReadOnlyHelper, Helper, WholesymFileContents, WholesymFileLocation}; +use crate::SymbolManagerObserver; /// Used in [`SymbolManager::load_external_file`] and returned by [`SymbolMap::symbol_file_origin`]. #[derive(Debug, Clone)] @@ -223,6 +224,10 @@ impl SymbolManager { Ok(binary.library_info()) } + pub fn set_observer(&mut self, observer: Option>) { + self.symbol_manager.helper().set_observer(observer); + } + /// Tell the `SymbolManager` about a known library. This allows it to find /// debug files or binaries later based on a subset of the library information. /// diff --git a/wholesym/src/symbol_manager_observer.rs b/wholesym/src/symbol_manager_observer.rs new file mode 100644 index 00000000..8f4127ad --- /dev/null +++ b/wholesym/src/symbol_manager_observer.rs @@ -0,0 +1,84 @@ +use std::path::Path; +use std::time::Duration; + +use crate::download_error::DownloadError; + +/// A trait for observing the behavior of a [`SymbolManager`](crate::SymbolManager). +/// This can be used for logging, displaying progress bars, expiring cached files, etc. +pub trait SymbolManagerObserver: Send + Sync + 'static { + /// Called when a new download is about to start, before the connection is established. + /// + /// The download ID is unique for each download. + /// + /// For each download ID, we guarantee that exactly one of the following methods + /// will be called at the end of the download: `on_download_completed`, + /// `on_download_failed`, or `on_download_canceled`. + fn on_new_download_before_connect(&self, download_id: u64, url: &str); + + /// Called once the connection has been established and HTTP headers + /// with a success status have arrived. + fn on_download_started(&self, download_id: u64); + + /// Called frequently during the download, whenever a new chunk has been read. + /// + /// If the HTTP response is gzip-compressed, the number of bytes can refer to + /// either the compressed or the uncompressed bytes - but it'll be consistent: + /// Either both `bytes_so_far` and `total_bytes` refer to the compressed sizes, + /// or both refer to the uncompressed sizes. + /// + /// If `total_bytes` is `None`, the total size is unknown. + fn on_download_progress(&self, download_id: u64, bytes_so_far: u64, total_bytes: Option); + + /// Called when the download has completed successfully. + /// + /// Mutually exclusive with `on_download_failed` and `on_download_canceled` for a + /// given download ID. + fn on_download_completed( + &self, + download_id: u64, + uncompressed_size_in_bytes: u64, + time_until_headers: Duration, + time_until_completed: Duration, + ); + + /// Called when the download has failed. + /// + /// This is quite common; the most common reason is [`DownloadError::StatusError`] + /// with [`StatusCode::NOT_FOUND`](http::StatusCode::NOT_FOUND), for files which + /// are not available on the server. + /// + /// Mutually exclusive with `on_download_completed` and `on_download_canceled` for a + /// given download ID. + fn on_download_failed(&self, download_id: u64, reason: DownloadError); + + /// Called when the download has been canceled. + /// + /// This does not indicate an error. We commonly attempt to download a file from + /// multiple sources simultaneously, and cancel other downloads once one has succeeded. + /// + /// This function is also called if the user cancels the download by dropping the future + /// returned from [`BreakpadSymbolDownloader::get_file`]. + /// + /// Mutually exclusive with `on_download_completed` and `on_download_failed` for a + /// given download ID. + fn on_download_canceled(&self, download_id: u64); + + /// Called when a file has been created, for example because it was downloaded from + /// a server, copied from a different cache directory, or extracted from a compressed + /// file. + fn on_file_created(&self, path: &Path, size_in_bytes: u64); + + /// Called when a file from the cache has been used to service a [`BreakpadSymbolDownloader::get_file`] call. + /// + /// This is only called for pre-existing files and not for newly-created files - newly-created + /// files only trigger a call to `on_file_created`. + /// + /// Useful to guide expiration decisions. + fn on_file_accessed(&self, path: &Path); + + /// Called when we were looking for a file in the cache, and it wasn't there. Used for + /// debug logging. + /// + /// Also called if checking for file existence fails for any other reason. + fn on_file_missed(&self, path: &Path); +} diff --git a/wholesym/src/verbose_symbol_manager_observer.rs b/wholesym/src/verbose_symbol_manager_observer.rs new file mode 100644 index 00000000..bd6179c4 --- /dev/null +++ b/wholesym/src/verbose_symbol_manager_observer.rs @@ -0,0 +1,80 @@ +use std::collections::HashMap; +use std::path::Path; +use std::sync::Mutex; + +use crate::{DownloadError, SymbolManagerObserver}; + +pub struct VerboseSymbolManagerObserver { + urls: Mutex>, +} + +impl VerboseSymbolManagerObserver { + pub fn new() -> Self { + Self { + urls: Mutex::new(HashMap::new()), + } + } +} + +impl Default for VerboseSymbolManagerObserver { + fn default() -> Self { + Self::new() + } +} + +impl SymbolManagerObserver for VerboseSymbolManagerObserver { + fn on_new_download_before_connect(&self, download_id: u64, url: &str) { + eprintln!("Connecting to {}...", url); + self.urls + .lock() + .unwrap() + .insert(download_id, url.to_owned()); + } + + fn on_download_started(&self, download_id: u64) { + let urls = self.urls.lock().unwrap(); + let url = urls.get(&download_id).unwrap(); + eprintln!("Downloading from {}...", url); + } + + fn on_download_progress( + &self, + _download_id: u64, + _bytes_so_far: u64, + _total_bytes: Option, + ) { + } + + fn on_download_completed( + &self, + download_id: u64, + _uncompressed_size_in_bytes: u64, + _time_until_headers: std::time::Duration, + _time_until_completed: std::time::Duration, + ) { + let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); + eprintln!("Finished download from {}.", url); + } + + fn on_download_failed(&self, download_id: u64, reason: DownloadError) { + let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); + eprintln!("Failed to download from {url}: {reason}."); + } + + fn on_download_canceled(&self, download_id: u64) { + let url = self.urls.lock().unwrap().remove(&download_id).unwrap(); + eprintln!("Canceled download from {}.", url); + } + + fn on_file_created(&self, path: &Path, size_in_bytes: u64) { + eprintln!("Created new file at {path:?} (size: {size_in_bytes} bytes)."); + } + + fn on_file_accessed(&self, path: &Path) { + eprintln!("Checking if {path:?} exists... yes"); + } + + fn on_file_missed(&self, path: &Path) { + eprintln!("Checking if {path:?} exists... no"); + } +} diff --git a/wholesym/tests/integration_tests/main.rs b/wholesym/tests/integration_tests/main.rs index ffe0f3d3..8bd3f32e 100644 --- a/wholesym/tests/integration_tests/main.rs +++ b/wholesym/tests/integration_tests/main.rs @@ -108,12 +108,11 @@ fn exe() { assert_eq!(info.arch.as_deref(), Some("x86_64")); } -#[test] -fn dwz_symbolication() { +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn dwz_symbolication() { let ls_dir = fixtures_dir().join("other").join("ls-linux"); let ls_bin_path = ls_dir.join("ls"); let config = wholesym::SymbolManagerConfig::default() - .verbose(true) .redirect_path_for_testing( "/usr/lib/debug/.build-id/63/260a3e6e46db57abf718f6a3562c6eedccf269.debug", ls_dir.join("260a3e6e46db57abf718f6a3562c6eedccf269.debug"), @@ -123,10 +122,10 @@ fn dwz_symbolication() { ls_dir.join("coreutils.debug"), ); let symbol_manager = wholesym::SymbolManager::with_config(config); - let symbol_map = futures::executor::block_on( - symbol_manager.load_symbol_map_for_binary_at_path(&ls_bin_path, None), - ) - .unwrap(); + let symbol_map = symbol_manager + .load_symbol_map_for_binary_at_path(&ls_bin_path, None) + .await + .unwrap(); assert_eq!( symbol_map.debug_id(), @@ -214,7 +213,7 @@ mod simple_example { expected_debug_id: DebugId, test_fn: F, ) { - let mut config = wholesym::SymbolManagerConfig::default().verbose(true); + let mut config = wholesym::SymbolManagerConfig::default(); for (s, path) in redirect_paths { config = config.redirect_path_for_testing(s, path); }