From 9c8d665b412dee7a7d5631789505d7d4d8fed1c1 Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Tue, 21 Feb 2023 11:18:41 +0100 Subject: [PATCH 1/9] Fix codestyle inconsistencies --- .github/workflows/build.yml | 2 + build.rs | 2 +- src/binary_parser.rs | 98 ++++--- src/config.rs | 336 ++++++++++++++---------- src/console_viewer.rs | 278 ++++++++++++++------ src/coredump.rs | 237 ++++++++++++----- src/cython.rs | 115 ++++++--- src/dump.rs | 53 +++- src/flamegraph.rs | 44 ++-- src/lib.rs | 14 +- src/main.rs | 180 +++++++------ src/native_stack_trace.rs | 170 +++++++----- src/python_bindings/mod.rs | 344 +++++++++++++++++------- src/python_data_access.rs | 249 +++++++++++++----- src/python_interpreters.rs | 395 ++++++++++++++++++---------- src/python_process_info.rs | 502 +++++++++++++++++++++++------------- src/python_spy.rs | 261 +++++++++++++------ src/python_threading.rs | 62 +++-- src/sampler.rs | 169 ++++++++---- src/speedscope.rs | 147 +++++++---- src/stack_trace.rs | 139 +++++++--- src/timer.rs | 18 +- src/utils.rs | 4 +- src/version.rs | 99 +++++-- tests/integration_test.rs | 104 +++++--- 25 files changed, 2734 insertions(+), 1288 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 767bf08d..bb00b545 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,6 +37,8 @@ jobs: - name: Install Dependencies run: sudo apt install libunwind-dev if: runner.os == 'Linux' + - name: Format + run: cargo fmt --check - name: Build run: cargo build --release --verbose --examples - name: Test diff --git a/build.rs b/build.rs index 3d8e7bdd..cec961ca 100644 --- a/build.rs +++ b/build.rs @@ -8,6 +8,6 @@ fn main() { match env::var("CARGO_CFG_TARGET_OS").unwrap().as_ref() { "windows" => println!("cargo:rustc-cfg=unwind"), "linux" => println!("cargo:rustc-cfg=unwind"), - _ => { } + _ => {} } } diff --git a/src/binary_parser.rs b/src/binary_parser.rs index 4d97a52b..cd3432ea 100644 --- a/src/binary_parser.rs +++ b/src/binary_parser.rs @@ -1,4 +1,3 @@ - use std::collections::HashMap; use std::fs::File; use std::path::Path; @@ -15,7 +14,7 @@ pub struct BinaryInfo { pub bss_size: u64, pub offset: u64, pub addr: u64, - pub size: u64 + pub size: u64, } impl BinaryInfo { @@ -42,12 +41,18 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result mach, goblin::mach::Mach::Fat(fat) => { - let arch = fat.iter_arches().find(|arch| - match arch { + let arch = fat + .iter_arches() + .find(|arch| match arch { Ok(arch) => arch.is_64(), - Err(_) => false - } - ).ok_or_else(|| format_err!("Failed to find 64 bit arch in FAT archive in {}", filename.display()))??; + Err(_) => false, + }) + .ok_or_else(|| { + format_err!( + "Failed to find 64 bit arch in FAT archive in {}", + filename.display() + ) + })??; let bytes = &buffer[arch.offset as usize..][..arch.size as usize]; goblin::mach::MachO::parse(bytes, 0)? } @@ -72,24 +77,44 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result { - let bss_header = elf.section_headers + let bss_header = elf + .section_headers .iter() .find(|ref header| header.sh_type == goblin::elf::section_header::SHT_NOBITS) - .ok_or_else(|| format_err!("Failed to find BSS section header in {}", filename.display()))?; - - let program_header = elf.program_headers + .ok_or_else(|| { + format_err!( + "Failed to find BSS section header in {}", + filename.display() + ) + })?; + + let program_header = elf + .program_headers .iter() - .find(|ref header| - header.p_type == goblin::elf::program_header::PT_LOAD && - header.p_flags & goblin::elf::program_header::PF_X != 0) - .ok_or_else(|| format_err!("Failed to find executable PT_LOAD program header in {}", filename.display()))?; + .find(|ref header| { + header.p_type == goblin::elf::program_header::PT_LOAD + && header.p_flags & goblin::elf::program_header::PF_X != 0 + }) + .ok_or_else(|| { + format_err!( + "Failed to find executable PT_LOAD program header in {}", + filename.display() + ) + })?; // p_vaddr may be larger than the map address in case when the header has an offset and // the map address is relatively small. In this case we can default to 0. @@ -103,14 +128,16 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result { for export in pe.exports { if let Some(name) = export.name { @@ -123,16 +150,27 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result { - Err(format_err!("Unhandled binary type")) } + _ => Err(format_err!("Unhandled binary type")), } } diff --git a/src/config.rs b/src/config.rs index a9e80118..e8a78d9a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,4 +1,7 @@ -use clap::{ArgEnum, Arg, Command, crate_description, crate_name, crate_version, PossibleValue, value_parser}; +use clap::{ + crate_description, crate_name, crate_version, value_parser, Arg, ArgEnum, Command, + PossibleValue, +}; use remoteprocess::Pid; /// Options on how to collect samples from a python process @@ -63,7 +66,7 @@ pub struct Config { pub enum FileFormat { flamegraph, raw, - speedscope + speedscope, } impl FileFormat { @@ -87,41 +90,55 @@ impl std::str::FromStr for FileFormat { } } - - #[derive(Debug, Clone, Eq, PartialEq)] pub enum LockingStrategy { NonBlocking, #[allow(dead_code)] AlreadyLocked, - Lock + Lock, } #[derive(Debug, Clone, Eq, PartialEq)] pub enum RecordDuration { Unlimited, - Seconds(u64) + Seconds(u64), } #[derive(Debug, Clone, Eq, PartialEq, Copy)] pub enum LineNo { NoLine, FirstLineNo, - LastInstruction + LastInstruction, } impl Default for Config { /// Initializes a new Config object with default parameters #[allow(dead_code)] fn default() -> Config { - Config{pid: None, python_program: None, filename: None, format: None, - command: String::from("top"), - blocking: LockingStrategy::Lock, show_line_numbers: false, sampling_rate: 100, - duration: RecordDuration::Unlimited, native: false, - gil_only: false, include_idle: false, include_thread_ids: false, - hide_progress: false, capture_output: true, dump_json: false, dump_locals: 0, subprocesses: false, - full_filenames: false, lineno: LineNo::LastInstruction, - refresh_seconds: 1.0, core_filename: None } + Config { + pid: None, + python_program: None, + filename: None, + format: None, + command: String::from("top"), + blocking: LockingStrategy::Lock, + show_line_numbers: false, + sampling_rate: 100, + duration: RecordDuration::Unlimited, + native: false, + gil_only: false, + include_idle: false, + include_thread_ids: false, + hide_progress: false, + capture_output: true, + dump_json: false, + dump_locals: 0, + subprocesses: false, + full_filenames: false, + lineno: LineNo::LastInstruction, + refresh_seconds: 1.0, + core_filename: None, + } } } @@ -129,24 +146,24 @@ impl Config { /// Uses clap to set config options from commandline arguments pub fn from_commandline() -> Config { let args: Vec = std::env::args().collect(); - Config::from_args(&args).unwrap_or_else( |e| e.exit() ) + Config::from_args(&args).unwrap_or_else(|e| e.exit()) } pub fn from_args(args: &[String]) -> clap::Result { // pid/native/nonblocking/rate/python_program/subprocesses/full_filenames arguments can be // used across various subcommand - define once here let pid = Arg::new("pid") - .short('p') - .long("pid") - .value_name("pid") - .help("PID of a running python program to spy on") - .takes_value(true); + .short('p') + .long("pid") + .value_name("pid") + .help("PID of a running python program to spy on") + .takes_value(true); #[cfg(unwind)] let native = Arg::new("native") - .short('n') - .long("native") - .help("Collect stack traces from native extensions written in Cython, C or C++"); + .short('n') + .long("native") + .help("Collect stack traces from native extensions written in Cython, C or C++"); #[cfg(not(target_os="freebsd"))] let nonblocking = Arg::new("nonblocking") @@ -155,94 +172,107 @@ impl Config { the performance impact of sampling, but may lead to inaccurate results"); let rate = Arg::new("rate") - .short('r') - .long("rate") - .value_name("rate") - .help("The number of samples to collect per second") - .default_value("100") - .takes_value(true); + .short('r') + .long("rate") + .value_name("rate") + .help("The number of samples to collect per second") + .default_value("100") + .takes_value(true); let subprocesses = Arg::new("subprocesses") - .short('s') - .long("subprocesses") - .help("Profile subprocesses of the original process"); + .short('s') + .long("subprocesses") + .help("Profile subprocesses of the original process"); - let full_filenames = Arg::new("full_filenames") - .long("full-filenames") - .help("Show full Python filenames, instead of shortening to show only the package part"); + let full_filenames = Arg::new("full_filenames").long("full-filenames").help( + "Show full Python filenames, instead of shortening to show only the package part", + ); let program = Arg::new("python_program") - .help("commandline of a python program to run") - .multiple_values(true); + .help("commandline of a python program to run") + .multiple_values(true); let idle = Arg::new("idle") - .short('i') - .long("idle") - .help("Include stack traces for idle threads"); + .short('i') + .long("idle") + .help("Include stack traces for idle threads"); let gil = Arg::new("gil") - .short('g') - .long("gil") - .help("Only include traces that are holding on to the GIL"); + .short('g') + .long("gil") + .help("Only include traces that are holding on to the GIL"); let top_delay = Arg::new("delay") - .long("delay") - .value_name("seconds") - .help("Delay between 'top' refreshes.") - .default_value("1.0") - .value_parser(clap::value_parser!(f64)) - .takes_value(true); + .long("delay") + .value_name("seconds") + .help("Delay between 'top' refreshes.") + .default_value("1.0") + .value_parser(clap::value_parser!(f64)) + .takes_value(true); let record = Command::new("record") .about("Records stack trace information to a flamegraph, speedscope or raw file") .arg(program.clone()) .arg(pid.clone().required_unless_present("python_program")) .arg(full_filenames.clone()) - .arg(Arg::new("output") - .short('o') - .long("output") - .value_name("filename") - .help("Output filename") - .takes_value(true) - .required(false)) - .arg(Arg::new("format") - .short('f') - .long("format") - .value_name("format") - .help("Output file format") - .takes_value(true) - .possible_values(FileFormat::possible_values()) - .ignore_case(true) - .default_value("flamegraph")) - .arg(Arg::new("duration") - .short('d') - .long("duration") - .value_name("duration") - .help("The number of seconds to sample for") - .default_value("unlimited") - .takes_value(true)) + .arg( + Arg::new("output") + .short('o') + .long("output") + .value_name("filename") + .help("Output filename") + .takes_value(true) + .required(false), + ) + .arg( + Arg::new("format") + .short('f') + .long("format") + .value_name("format") + .help("Output file format") + .takes_value(true) + .possible_values(FileFormat::possible_values()) + .ignore_case(true) + .default_value("flamegraph"), + ) + .arg( + Arg::new("duration") + .short('d') + .long("duration") + .value_name("duration") + .help("The number of seconds to sample for") + .default_value("unlimited") + .takes_value(true), + ) .arg(rate.clone()) .arg(subprocesses.clone()) - .arg(Arg::new("function") - .short('F') - .long("function") - .help("Aggregate samples by function's first line number, instead of current line number")) - .arg(Arg::new("nolineno") - .long("nolineno") - .help("Do not show line numbers")) - .arg(Arg::new("threads") - .short('t') - .long("threads") - .help("Show thread ids in the output")) + .arg(Arg::new("function").short('F').long("function").help( + "Aggregate samples by function's first line number, instead of current line number", + )) + .arg( + Arg::new("nolineno") + .long("nolineno") + .help("Do not show line numbers"), + ) + .arg( + Arg::new("threads") + .short('t') + .long("threads") + .help("Show thread ids in the output"), + ) .arg(gil.clone()) .arg(idle.clone()) - .arg(Arg::new("capture") - .long("capture") - .hide(true) - .help("Captures output from child process")) - .arg(Arg::new("hideprogress") - .long("hideprogress") - .hide(true) - .help("Hides progress bar (useful for showing error output on record)")); + .arg( + Arg::new("capture") + .long("capture") + .hide(true) + .help("Captures output from child process"), + ) + .arg( + Arg::new("hideprogress") + .long("hideprogress") + .hide(true) + .help("Hides progress bar (useful for showing error output on record)"), + ); let top = Command::new("top") .about("Displays a top like view of functions consuming CPU") @@ -255,23 +285,25 @@ impl Config { .arg(idle.clone()) .arg(top_delay.clone()); - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let dump_pid = pid.clone().required_unless_present("core"); - #[cfg(not(target_os="linux"))] + #[cfg(not(target_os = "linux"))] let dump_pid = pid.clone().required(true); let dump = Command::new("dump") .about("Dumps stack traces for a target program to stdout") .arg(dump_pid); - #[cfg(target_os="linux")] - let dump = dump.arg(Arg::new("core") - .short('c') - .long("core") - .help("Filename of coredump to display python stack traces from") - .value_name("core") - .takes_value(true)); + #[cfg(target_os = "linux")] + let dump = dump.arg( + Arg::new("core") + .short('c') + .long("core") + .help("Filename of coredump to display python stack traces from") + .value_name("core") + .takes_value(true), + ); let dump = dump.arg(full_filenames.clone()) .arg(Arg::new("locals") @@ -288,9 +320,11 @@ impl Config { let completions = Command::new("completions") .about("Generate shell completions") .hide(true) - .arg(Arg::new("shell") - .value_parser(value_parser!(clap_complete::Shell)) - .help("Shell type")); + .arg( + Arg::new("shell") + .value_parser(value_parser!(clap_complete::Shell)) + .help("Shell type"), + ); // add native unwinding if appropriate #[cfg(unwind)] @@ -301,11 +335,11 @@ impl Config { let dump = dump.arg(native.clone()); // Nonblocking isn't an option for freebsd, remove - #[cfg(not(target_os="freebsd"))] + #[cfg(not(target_os = "freebsd"))] let record = record.arg(nonblocking.clone()); - #[cfg(not(target_os="freebsd"))] + #[cfg(not(target_os = "freebsd"))] let top = top.arg(nonblocking.clone()); - #[cfg(not(target_os="freebsd"))] + #[cfg(not(target_os = "freebsd"))] let dump = dump.arg(nonblocking.clone()); let mut app = Command::new(crate_name!()) @@ -331,32 +365,41 @@ impl Config { config.sampling_rate = matches.value_of_t("rate")?; config.duration = match matches.value_of("duration") { Some("unlimited") | None => RecordDuration::Unlimited, - Some(seconds) => RecordDuration::Seconds(seconds.parse().expect("invalid duration")) + Some(seconds) => { + RecordDuration::Seconds(seconds.parse().expect("invalid duration")) + } }; config.format = Some(matches.value_of_t("format")?); config.filename = matches.value_of("output").map(|f| f.to_owned()); config.show_line_numbers = matches.occurrences_of("nolineno") == 0; - config.lineno = if matches.occurrences_of("nolineno") > 0 { LineNo::NoLine } else if matches.occurrences_of("function") > 0 { LineNo::FirstLineNo } else { LineNo::LastInstruction }; + config.lineno = if matches.occurrences_of("nolineno") > 0 { + LineNo::NoLine + } else if matches.occurrences_of("function") > 0 { + LineNo::FirstLineNo + } else { + LineNo::LastInstruction + }; config.include_thread_ids = matches.occurrences_of("threads") > 0; - if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 { + if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 + { eprintln!("--function & --nolinenos can't be used together"); std::process::exit(1); } config.hide_progress = matches.occurrences_of("hideprogress") > 0; - }, + } "top" => { config.sampling_rate = matches.value_of_t("rate")?; config.refresh_seconds = *matches.get_one::("delay").unwrap(); - }, + } "dump" => { config.dump_json = matches.occurrences_of("json") > 0; config.dump_locals = matches.occurrences_of("locals"); - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] { - config.core_filename = matches.value_of("core").map(|f| f.to_owned()); + config.core_filename = matches.value_of("core").map(|f| f.to_owned()); } - }, + } "completions" => { let shell = matches.get_one::("shell").unwrap(); let app_name = app.get_name().to_string(); @@ -368,12 +411,12 @@ impl Config { match subcommand { "record" | "top" => { - config.python_program = matches.values_of("python_program").map(|vals| { - vals.map(|v| v.to_owned()).collect() - }); + config.python_program = matches + .values_of("python_program") + .map(|vals| vals.map(|v| v.to_owned()).collect()); config.gil_only = matches.occurrences_of("gil") > 0; config.include_idle = matches.occurrences_of("idle") > 0; - }, + } _ => {} } @@ -381,7 +424,9 @@ impl Config { config.command = subcommand.to_owned(); // options that can be shared between subcommands - config.pid = matches.value_of("pid").map(|p| p.parse().expect("invalid pid")); + config.pid = matches + .value_of("pid") + .map(|p| p.parse().expect("invalid pid")); config.full_filenames = matches.occurrences_of("full_filenames") > 0; if cfg!(unwind) { config.native = matches.occurrences_of("native") > 0; @@ -394,7 +439,7 @@ impl Config { if matches.occurrences_of("nonblocking") > 0 { // disable native profiling if invalidly asked for - if config.native { + if config.native { eprintln!("Can't get native stack traces with the --nonblocking option."); std::process::exit(1); } @@ -406,22 +451,26 @@ impl Config { if config.native && config.subprocesses { // the native extension profiling code relies on dbghelp library, which doesn't // seem to work when connecting to multiple processes. disallow - eprintln!("Can't get native stack traces with the ---subprocesses option on windows."); + eprintln!( + "Can't get native stack traces with the ---subprocesses option on windows." + ); std::process::exit(1); } } - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] { - if config.pid.is_some() { - if std::env::var("PYSPY_ALLOW_FREEBSD_ATTACH").is_err() { + if config.pid.is_some() { + if std::env::var("PYSPY_ALLOW_FREEBSD_ATTACH").is_err() { eprintln!("On FreeBSD, running py-spy can cause an exception in the profiled process if the process \ is calling 'socket.connect'."); eprintln!("While this is fixed in recent versions of python, you need to acknowledge the risk here by \ setting an environment variable PYSPY_ALLOW_FREEBSD_ATTACH to run this command."); - eprintln!("\nSee https://github.com/benfred/py-spy/issues/147 for more information"); + eprintln!( + "\nSee https://github.com/benfred/py-spy/issues/147 for more information" + ); std::process::exit(-1); - } + } } } Ok(config) @@ -432,7 +481,7 @@ impl Config { mod tests { use super::*; fn get_config(cmd: &str) -> clap::Result { - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] std::env::set_var("PYSPY_ALLOW_FREEBSD_ATTACH", "1"); let args: Vec = cmd.split_whitespace().map(|x| x.to_owned()).collect(); Config::from_args(&args) @@ -452,17 +501,26 @@ mod tests { assert_eq!(config, short_config); // missing the --pid argument should fail - assert_eq!(get_config("py-spy record -o foo").unwrap_err().kind, - clap::ErrorKind::MissingRequiredArgument); + assert_eq!( + get_config("py-spy record -o foo").unwrap_err().kind, + clap::ErrorKind::MissingRequiredArgument + ); // but should work when passed a python program let program_config = get_config("py-spy r -o foo -- python test.py").unwrap(); - assert_eq!(program_config.python_program, Some(vec![String::from("python"), String::from("test.py")])); + assert_eq!( + program_config.python_program, + Some(vec![String::from("python"), String::from("test.py")]) + ); assert_eq!(program_config.pid, None); // passing an invalid file format should fail - assert_eq!(get_config("py-spy r -p 1234 -o foo -f unknown").unwrap_err().kind, - clap::ErrorKind::InvalidValue); + assert_eq!( + get_config("py-spy r -p 1234 -o foo -f unknown") + .unwrap_err() + .kind, + clap::ErrorKind::InvalidValue + ); // test out overriding these params by setting flags assert_eq!(config.include_idle, false); @@ -487,8 +545,10 @@ mod tests { assert_eq!(config, short_config); // missing the --pid argument should fail - assert_eq!(get_config("py-spy dump").unwrap_err().kind, - clap::ErrorKind::MissingRequiredArgument); + assert_eq!( + get_config("py-spy dump").unwrap_err().kind, + clap::ErrorKind::MissingRequiredArgument + ); } #[test] @@ -505,7 +565,9 @@ mod tests { #[test] fn test_parse_args() { - assert_eq!(get_config("py-spy dude").unwrap_err().kind, - clap::ErrorKind::UnrecognizedSubcommand); + assert_eq!( + get_config("py-spy dude").unwrap_err().kind, + clap::ErrorKind::UnrecognizedSubcommand + ); } } diff --git a/src/console_viewer.rs b/src/console_viewer.rs index 906d154e..9a9d3f9a 100644 --- a/src/console_viewer.rs +++ b/src/console_viewer.rs @@ -1,16 +1,16 @@ use std; use std::collections::HashMap; -use std::vec::Vec; use std::io; use std::io::{Read, Write}; -use std::sync::{Mutex, Arc, atomic}; +use std::sync::{atomic, Arc, Mutex}; use std::thread; +use std::vec::Vec; use anyhow::Error; -use console::{Term, style}; +use console::{style, Term}; use crate::config::Config; -use crate::stack_trace::{StackTrace, Frame}; +use crate::stack_trace::{Frame, StackTrace}; use crate::version::Version; pub struct ConsoleViewer { @@ -23,14 +23,16 @@ pub struct ConsoleViewer { options: Arc>, stats: Stats, subprocesses: bool, - config: Config + config: Config, } impl ConsoleViewer { - pub fn new(show_linenumbers: bool, - python_command: &str, - version: &Option, - config: &Config) -> io::Result { + pub fn new( + show_linenumbers: bool, + python_command: &str, + version: &Option, + config: &Config, + ) -> io::Result { let sampling_rate = 1.0 / (config.sampling_rate as f64); let running = Arc::new(atomic::AtomicBool::new(true)); let options = Arc::new(Mutex::new(Options::new(show_linenumbers))); @@ -55,7 +57,7 @@ impl ConsoleViewer { '2' => options.sort_column = 2, '3' => options.sort_column = 3, '4' => options.sort_column = 4, - _ => {}, + _ => {} } options.reset_style = previous_usage != options.usage; @@ -63,13 +65,17 @@ impl ConsoleViewer { } }); - Ok(ConsoleViewer{console_config: os_impl::ConsoleConfig::new()?, - version: version.clone(), - command: python_command.to_owned(), - running, options, sampling_rate, - subprocesses: config.subprocesses, - stats: Stats::new(), - config: config.clone()}) + Ok(ConsoleViewer { + console_config: os_impl::ConsoleConfig::new()?, + version: version.clone(), + command: python_command.to_owned(), + running, + options, + sampling_rate, + subprocesses: config.subprocesses, + stats: Stats::new(), + config: config.clone(), + }) } pub fn increment(&mut self, traces: &[StackTrace]) -> Result<(), Error> { @@ -101,7 +107,10 @@ impl ConsoleViewer { } update_function_statistics(&mut self.stats.line_counts, trace, |frame| { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; + let filename = match &frame.short_filename { + Some(f) => &f, + None => &frame.filename, + }; if frame.line != 0 { format!("{} ({}:{})", frame.name, filename, frame.line) } else { @@ -110,7 +119,10 @@ impl ConsoleViewer { }); update_function_statistics(&mut self.stats.function_counts, trace, |frame| { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; + let filename = match &frame.short_filename { + Some(f) => &f, + None => &frame.filename, + }; format!("{} ({})", frame.name, filename) }); } @@ -122,8 +134,13 @@ impl ConsoleViewer { // Get the top aggregate function calls (either by line or by function as ) let mut options = self.options.lock().unwrap(); options.dirty = false; - let counts = if options.show_linenumbers { &self.stats.line_counts } else { &self.stats.function_counts }; - let mut counts:Vec<(&FunctionStatistics, &str)> = counts.iter().map(|(x,y)| (y, x.as_ref())).collect(); + let counts = if options.show_linenumbers { + &self.stats.line_counts + } else { + &self.stats.function_counts + }; + let mut counts: Vec<(&FunctionStatistics, &str)> = + counts.iter().map(|(x, y)| (y, x.as_ref())).collect(); // TODO: subsort ? match options.sort_column { @@ -131,7 +148,7 @@ impl ConsoleViewer { 2 => counts.sort_unstable_by(|a, b| b.0.current_total.cmp(&a.0.current_total)), 3 => counts.sort_unstable_by(|a, b| b.0.overall_own.cmp(&a.0.overall_own)), 4 => counts.sort_unstable_by(|a, b| b.0.overall_total.cmp(&a.0.overall_total)), - _ => panic!("unknown sort column. this really shouldn't happen") + _ => panic!("unknown sort column. this really shouldn't happen"), } let term = Term::stdout(); let (height, width) = term.size(); @@ -164,23 +181,33 @@ impl ConsoleViewer { } if self.subprocesses { - out!("Collecting samples from '{}' and subprocesses", style(&self.command).green()); + out!( + "Collecting samples from '{}' and subprocesses", + style(&self.command).green() + ); } else { - out!("Collecting samples from '{}' (python v{})", style(&self.command).green(), self.version.as_ref().unwrap()); + out!( + "Collecting samples from '{}' (python v{})", + style(&self.command).green(), + self.version.as_ref().unwrap() + ); } let error_rate = self.stats.errors as f64 / self.stats.overall_samples as f64; if error_rate >= 0.01 && self.stats.overall_samples > 100 { let error_string = self.stats.last_error.as_ref().unwrap(); - out!("Total Samples {}, Error Rate {:.2}% ({})", - style(self.stats.overall_samples).bold(), - style(error_rate * 100.0).bold().red(), - style(error_string).bold()); + out!( + "Total Samples {}, Error Rate {:.2}% ({})", + style(self.stats.overall_samples).bold(), + style(error_rate * 100.0).bold().red(), + style(error_string).bold() + ); } else { - out!("Total Samples {}", style(self.stats.overall_samples).bold()); + out!("Total Samples {}", style(self.stats.overall_samples).bold()); } - out!("GIL: {:.2}%, Active: {:>.2}%, Threads: {}{}", + out!( + "GIL: {:.2}%, Active: {:>.2}%, Threads: {}{}", style(100.0 * self.stats.gil as f64 / self.stats.current_samples as f64).bold(), style(100.0 * self.stats.active as f64 / self.stats.current_samples as f64).bold(), style(self.stats.threads).bold(), @@ -188,7 +215,8 @@ impl ConsoleViewer { format!(", Processes {}", style(self.stats.processes).bold()) } else { "".to_owned() - }); + } + ); out!(); @@ -213,51 +241,95 @@ impl ConsoleViewer { // If we aren't at least 50 characters wide, lets use two lines per entry // Otherwise, truncate the filename so that it doesn't wrap around to the next line - let header_lines = if width > 50 { header_lines } else { header_lines + height as usize / 2 }; - let max_function_width = if width > 50 { width as usize - 35 } else { width as usize }; + let header_lines = if width > 50 { + header_lines + } else { + header_lines + height as usize / 2 + }; + let max_function_width = if width > 50 { + width as usize - 35 + } else { + width as usize + }; - out!("{:>7}{:>8}{:>9}{:>11}{:width$}", percent_own_header, percent_total_header, - time_own_header, time_total_header, function_header, width=max_function_width); + out!( + "{:>7}{:>8}{:>9}{:>11}{:width$}", + percent_own_header, + percent_total_header, + time_own_header, + time_total_header, + function_header, + width = max_function_width + ); let mut written = 0; for (samples, label) in counts.iter().take(height as usize - header_lines) { - out!("{:>6.2}% {:>6.2}% {:>7}s {:>8}s {:.width$}", + out!( + "{:>6.2}% {:>6.2}% {:>7}s {:>8}s {:.width$}", 100.0 * samples.current_own as f64 / (self.stats.current_samples as f64), 100.0 * samples.current_total as f64 / (self.stats.current_samples as f64), display_time(samples.overall_own as f64 * self.sampling_rate), display_time(samples.overall_total as f64 * self.sampling_rate), - label, width=max_function_width - 2); - written += 1; + label, + width = max_function_width - 2 + ); + written += 1; } - for _ in written.. height as usize - header_lines { + for _ in written..height as usize - header_lines { out!(); } out!(); if options.usage { - out!("{:width$}", style(" Keyboard Shortcuts ").reverse(), width=width as usize); + out!( + "{:width$}", + style(" Keyboard Shortcuts ").reverse(), + width = width as usize + ); out!(); out!("{:^12}{:<}", style("key").green(), style("action").green()); - out!("{:^12}{:<}", "1", "Sort by %Own (% of time currently spent in the function)"); - out!("{:^12}{:<}", "2", "Sort by %Total (% of time currently in the function and its children)"); - out!("{:^12}{:<}", "3", "Sort by OwnTime (Overall time spent in the function)"); - out!("{:^12}{:<}", "4", "Sort by TotalTime (Overall time spent in the function and its children)"); - out!("{:^12}{:<}", "L,l", "Toggle between aggregating by line number or by function"); + out!( + "{:^12}{:<}", + "1", + "Sort by %Own (% of time currently spent in the function)" + ); + out!( + "{:^12}{:<}", + "2", + "Sort by %Total (% of time currently in the function and its children)" + ); + out!( + "{:^12}{:<}", + "3", + "Sort by OwnTime (Overall time spent in the function)" + ); + out!( + "{:^12}{:<}", + "4", + "Sort by TotalTime (Overall time spent in the function and its children)" + ); + out!( + "{:^12}{:<}", + "L,l", + "Toggle between aggregating by line number or by function" + ); out!("{:^12}{:<}", "R,r", "Reset statistics"); out!("{:^12}{:<}", "X,x", "Exit this help screen"); out!(); //println!("{:^12}{:<}", "Control-C", "Quit py-spy"); } else { - out!("Press {} to quit, or {} for help.", - style("Control-C").bold().reverse(), - style("?").bold().reverse()); + out!( + "Press {} to quit, or {} for help.", + style("Control-C").bold().reverse(), + style("?").bold().reverse() + ); } std::io::stdout().flush()?; Ok(()) } - pub fn increment_error(&mut self, err: &Error) -> Result<(), Error> { + pub fn increment_error(&mut self, err: &Error) -> Result<(), Error> { self.maybe_reset(); self.stats.errors += 1; self.stats.last_error = Some(format!("{}", err)); @@ -273,8 +345,10 @@ impl ConsoleViewer { // update faster if we only have a few samples, or if we changed options match self.stats.overall_samples { 10 | 100 | 500 => true, - _ => self.options.lock().unwrap().dirty || - self.stats.elapsed >= self.config.refresh_seconds + _ => { + self.options.lock().unwrap().dirty + || self.stats.elapsed >= self.config.refresh_seconds + } } } @@ -311,11 +385,16 @@ struct FunctionStatistics { current_own: u64, current_total: u64, overall_own: u64, - overall_total: u64 + overall_total: u64, } -fn update_function_statistics(counts: &mut HashMap, trace: &StackTrace, key_func: K) - where K: Fn(&Frame) -> String { +fn update_function_statistics( + counts: &mut HashMap, + trace: &StackTrace, + key_func: K, +) where + K: Fn(&Frame) -> String, +{ // we need to deduplicate (so we don't overcount cumulative stats with recursive function calls) let mut current = HashMap::new(); for (i, frame) in trace.frames.iter().enumerate() { @@ -324,8 +403,12 @@ fn update_function_statistics(counts: &mut HashMap Options { - Options{dirty: false, usage: false, reset: false, sort_column: 3, show_linenumbers, reset_style: false} + Options { + dirty: false, + usage: false, + reset: false, + sort_column: 3, + show_linenumbers, + reset_style: false, + } } } impl Stats { fn new() -> Stats { - Stats{current_samples: 0, overall_samples: 0, elapsed: 0., - errors: 0, late_samples: 0, threads: 0, processes: 0, gil: 0, active: 0, - line_counts: HashMap::new(), function_counts: HashMap::new(), - last_error: None, last_delay: None} + Stats { + current_samples: 0, + overall_samples: 0, + elapsed: 0., + errors: 0, + late_samples: 0, + threads: 0, + processes: 0, + gil: 0, + active: 0, + line_counts: HashMap::new(), + function_counts: HashMap::new(), + last_error: None, + last_delay: None, + } } pub fn reset_current(&mut self) { @@ -421,11 +522,11 @@ for doing this: #[cfg(unix)] mod os_impl { use super::*; - use termios::{Termios, TCSANOW, ECHO, ICANON, tcsetattr}; + use termios::{tcsetattr, Termios, ECHO, ICANON, TCSANOW}; pub struct ConsoleConfig { termios: Termios, - stdin: i32 + stdin: i32, } impl ConsoleConfig { @@ -445,7 +546,7 @@ mod os_impl { println!(); } - Ok(ConsoleConfig{termios, stdin}) + Ok(ConsoleConfig { termios, stdin }) } pub fn reset_cursor(&self) -> io::Result<()> { @@ -466,19 +567,21 @@ mod os_impl { #[cfg(windows)] mod os_impl { use super::*; - use winapi::shared::minwindef::{DWORD}; - use winapi::um::winnt::{HANDLE}; - use winapi::um::winbase::{STD_INPUT_HANDLE, STD_OUTPUT_HANDLE}; - use winapi::um::processenv::GetStdHandle; - use winapi::um::handleapi::INVALID_HANDLE_VALUE; + use winapi::shared::minwindef::DWORD; use winapi::um::consoleapi::{GetConsoleMode, SetConsoleMode}; - use winapi::um::wincon::{ENABLE_LINE_INPUT, ENABLE_ECHO_INPUT, CONSOLE_SCREEN_BUFFER_INFO, SetConsoleCursorPosition, - GetConsoleScreenBufferInfo, COORD, FillConsoleOutputAttribute}; + use winapi::um::handleapi::INVALID_HANDLE_VALUE; + use winapi::um::processenv::GetStdHandle; + use winapi::um::winbase::{STD_INPUT_HANDLE, STD_OUTPUT_HANDLE}; + use winapi::um::wincon::{ + FillConsoleOutputAttribute, GetConsoleScreenBufferInfo, SetConsoleCursorPosition, + CONSOLE_SCREEN_BUFFER_INFO, COORD, ENABLE_ECHO_INPUT, ENABLE_LINE_INPUT, + }; + use winapi::um::winnt::HANDLE; pub struct ConsoleConfig { stdin: HANDLE, mode: DWORD, - top_left: COORD + top_left: COORD, } impl ConsoleConfig { @@ -515,9 +618,17 @@ mod os_impl { // Figure out a consistent spot in the terminal buffer to write output to let mut top_left = csbi.dwCursorPosition; top_left.X = 0; - top_left.Y = if top_left.Y > height { top_left.Y - height } else { 0 }; - - Ok(ConsoleConfig{stdin, mode, top_left}) + top_left.Y = if top_left.Y > height { + top_left.Y - height + } else { + 0 + }; + + Ok(ConsoleConfig { + stdin, + mode, + top_left, + }) } } @@ -543,8 +654,17 @@ mod os_impl { } let mut written: DWORD = 0; - let console_size = ((1 + csbi.srWindow.Bottom - csbi.srWindow.Top) * (csbi.srWindow.Right - csbi.srWindow.Left)) as DWORD; - if FillConsoleOutputAttribute(stdout, csbi.wAttributes, console_size, self.top_left, &mut written) == 0 { + let console_size = ((1 + csbi.srWindow.Bottom - csbi.srWindow.Top) + * (csbi.srWindow.Right - csbi.srWindow.Left)) + as DWORD; + if FillConsoleOutputAttribute( + stdout, + csbi.wAttributes, + console_size, + self.top_left, + &mut written, + ) == 0 + { return Err(io::Error::last_os_error()); } Ok(()) @@ -554,7 +674,9 @@ mod os_impl { impl Drop for ConsoleConfig { fn drop(&mut self) { - unsafe { SetConsoleMode(self.stdin, self.mode); } + unsafe { + SetConsoleMode(self.stdin, self.mode); + } } } } diff --git a/src/coredump.rs b/src/coredump.rs index cdd2f5c8..273ad5b6 100644 --- a/src/coredump.rs +++ b/src/coredump.rs @@ -1,29 +1,34 @@ use std::collections::HashMap; +use std::ffi::OsStr; use std::fs::File; +use std::io::Read; +use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::path::PathBuf; -use std::ffi::OsStr; -use std::os::unix::ffi::OsStrExt; -use std::io::Read; -use anyhow::{Error, Context, Result}; +use anyhow::{Context, Error, Result}; use console::style; use goblin; -use log::{info}; use libc; +use log::info; use remoteprocess; use remoteprocess::ProcessMemory; -use crate::binary_parser::{BinaryInfo, parse_binary}; +use crate::binary_parser::{parse_binary, BinaryInfo}; +use crate::config::Config; use crate::dump::print_trace; -use crate::python_bindings::{v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{ + v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; use crate::python_data_access::format_variable; use crate::python_interpreters::InterpreterState; -use crate::python_process_info::{is_python_lib, ContainsAddr, PythonProcessInfo, get_python_version, get_interpreter_address, get_threadstate_address}; -use crate::stack_trace::{StackTrace, get_stack_traces}; +use crate::python_process_info::{ + get_interpreter_address, get_python_version, get_threadstate_address, is_python_lib, + ContainsAddr, PythonProcessInfo, +}; use crate::python_threading::thread_names_from_interpreter; +use crate::stack_trace::{get_stack_traces, StackTrace}; use crate::version::Version; -use crate::config::Config; #[derive(Debug, Clone)] pub struct CoreMapRange { @@ -34,17 +39,30 @@ pub struct CoreMapRange { // Defines accessors to match those in proc_maps. However, can't use the // proc_maps trait since is private impl CoreMapRange { - pub fn size(&self) -> usize { self.segment.p_memsz as usize } - pub fn start(&self) -> usize { self.segment.p_vaddr as usize } - pub fn filename(&self) -> Option<&Path> { self.pathname.as_deref() } - pub fn is_exec(&self) -> bool { self.segment.is_executable() } - pub fn is_write(&self) -> bool { self.segment.is_write() } - pub fn is_read(&self) -> bool { self.segment.is_read() } + pub fn size(&self) -> usize { + self.segment.p_memsz as usize + } + pub fn start(&self) -> usize { + self.segment.p_vaddr as usize + } + pub fn filename(&self) -> Option<&Path> { + self.pathname.as_deref() + } + pub fn is_exec(&self) -> bool { + self.segment.is_executable() + } + pub fn is_write(&self) -> bool { + self.segment.is_write() + } + pub fn is_read(&self) -> bool { + self.segment.is_read() + } } impl ContainsAddr for Vec { fn contains_addr(&self, addr: usize) -> bool { - self.iter().any(|map| (addr >= map.start()) && (addr < (map.start() + map.size()))) + self.iter() + .any(|map| (addr >= map.start()) && (addr < (map.start() + map.size()))) } } @@ -62,9 +80,11 @@ impl CoreDump { let mut file = File::open(filename)?; let mut contents = Vec::new(); file.read_to_end(&mut contents)?; - let elf = goblin::elf::Elf::parse(&contents)?; + let elf = goblin::elf::Elf::parse(&contents)?; - let notes = elf.iter_note_headers(&contents).ok_or_else(|| format_err!("no note segment found"))?; + let notes = elf + .iter_note_headers(&contents) + .ok_or_else(|| format_err!("no note segment found"))?; let mut filenames = HashMap::new(); let mut psinfo = None; @@ -72,15 +92,14 @@ impl CoreDump { for note in notes { if let Ok(note) = note { if note.n_type == goblin::elf::note::NT_PRPSINFO { - psinfo = Some(unsafe { *(note.desc.as_ptr() as * const elfcore::elf_prpsinfo) }); - } - else if note.n_type == goblin::elf::note::NT_PRSTATUS { - let thread_status = unsafe { *(note.desc.as_ptr() as * const elfcore::elf_prstatus) }; + psinfo = Some(unsafe { *(note.desc.as_ptr() as *const elfcore::elf_prpsinfo) }); + } else if note.n_type == goblin::elf::note::NT_PRSTATUS { + let thread_status = + unsafe { *(note.desc.as_ptr() as *const elfcore::elf_prstatus) }; status.push(thread_status); - } - else if note.n_type == goblin::elf::note::NT_FILE { + } else if note.n_type == goblin::elf::note::NT_FILE { let data = note.desc; - let ptrs = data.as_ptr() as * const usize; + let ptrs = data.as_ptr() as *const usize; let count = unsafe { *ptrs }; let _page_size = unsafe { *ptrs.offset(1) }; @@ -106,16 +125,33 @@ impl CoreDump { for ph in elf.program_headers { if ph.p_type == goblin::elf::program_header::PT_LOAD { let pathname = filenames.get(&(ph.p_vaddr as _)); - let map = CoreMapRange {pathname: pathname.cloned(), segment: ph}; - info!("map: {:016x}-{:016x} {}{}{} {}", map.start(), map.start() + map.size(), - if map.is_read() {'r'} else {'-'}, if map.is_write() {'w'} else {'-'}, if map.is_exec() {'x'} else {'-'}, - map.filename().unwrap_or(&std::path::PathBuf::from("")).display()); + let map = CoreMapRange { + pathname: pathname.cloned(), + segment: ph, + }; + info!( + "map: {:016x}-{:016x} {}{}{} {}", + map.start(), + map.start() + map.size(), + if map.is_read() { 'r' } else { '-' }, + if map.is_write() { 'w' } else { '-' }, + if map.is_exec() { 'x' } else { '-' }, + map.filename() + .unwrap_or(&std::path::PathBuf::from("")) + .display() + ); maps.push(map); } } - Ok(CoreDump{filename: filename.to_owned(), contents, maps, psinfo, status}) + Ok(CoreDump { + filename: filename.to_owned(), + contents, + maps, + psinfo, + status, + }) } } @@ -131,8 +167,8 @@ impl ProcessMemory for CoreDump { let ph = &map.segment; if start >= ph.p_vaddr && start <= (ph.p_vaddr + ph.p_memsz) { let offset = (start - ph.p_vaddr + ph.p_offset) as usize; - buf.copy_from_slice(&self.contents[offset..(offset+buf.len())]); - return Ok(()) + buf.copy_from_slice(&self.contents[offset..(offset + buf.len())]); + return Ok(()); } } @@ -155,30 +191,33 @@ impl PythonCoreDump { // Get the python binary from the maps, and parse it let (python_filename, python_binary) = { - let map = maps.iter().find(|m| m.filename().is_some() & m.is_exec()).ok_or_else(|| format_err!("Failed to get binary from coredump"))?; + let map = maps + .iter() + .find(|m| m.filename().is_some() & m.is_exec()) + .ok_or_else(|| format_err!("Failed to get binary from coredump"))?; let python_filename = map.filename().unwrap(); - let python_binary = parse_binary(python_filename, map.start() as _ , map.size() as _); + let python_binary = parse_binary(python_filename, map.start() as _, map.size() as _); info!("Found python binary @ {}", python_filename.display()); (python_filename.to_owned(), python_binary) }; // get the libpython binary (if any) from maps let libpython_binary = { - let libmap = maps.iter() - .find(|m| { - if let Some(pathname) = m.filename() { - if let Some(pathname) = pathname.to_str() { - return is_python_lib(pathname) && m.is_exec(); - } + let libmap = maps.iter().find(|m| { + if let Some(pathname) = m.filename() { + if let Some(pathname) = pathname.to_str() { + return is_python_lib(pathname) && m.is_exec(); } - false - }); + } + false + }); let mut libpython_binary: Option = None; if let Some(libpython) = libmap { if let Some(filename) = &libpython.filename() { info!("Found libpython binary @ {}", filename.display()); - let parsed = parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; + let parsed = + parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; libpython_binary = Some(parsed); } } @@ -191,10 +230,16 @@ impl PythonCoreDump { _ => python_binary.ok(), }; - let python_info = PythonProcessInfo{python_binary, libpython_binary, maps: Box::new(core.maps.clone()), - python_filename: python_filename, dockerized: false}; + let python_info = PythonProcessInfo { + python_binary, + libpython_binary, + maps: Box::new(core.maps.clone()), + python_filename: python_filename, + dockerized: false, + }; - let version = get_python_version(&python_info, &core).context("failed to get python version")?; + let version = + get_python_version(&python_info, &core).context("failed to get python version")?; info!("Got python version {}", version); let interpreter_address = get_interpreter_address(&python_info, &core, &version)?; @@ -205,37 +250,76 @@ impl PythonCoreDump { let threadstate_address = get_threadstate_address(&python_info, &version, &config)?; info!("found threadstate at 0x{:016x}", threadstate_address); - Ok(PythonCoreDump{core, version, interpreter_address, threadstate_address}) + Ok(PythonCoreDump { + core, + version, + interpreter_address, + threadstate_address, + }) } pub fn get_stack(&self, config: &Config) -> Result, Error> { if config.native { - return Err(format_err!("Native unwinding isn't yet supported with coredumps")); + return Err(format_err!( + "Native unwinding isn't yet supported with coredumps" + )); } if config.subprocesses { - return Err(format_err!("Subprocesses can't be used for getting stacktraces from coredumps")); + return Err(format_err!( + "Subprocesses can't be used for getting stacktraces from coredumps" + )); } // different versions have different layouts, check as appropriate Ok(match self.version { - Version{major: 2, minor: 3..=7, ..} => self._get_stack::(config), - Version{major: 3, minor: 3, ..} => self._get_stack::(config), - Version{major: 3, minor: 4..=5, ..} => self._get_stack::(config), - Version{major: 3, minor: 6, ..} => self._get_stack::(config), - Version{major: 3, minor: 7, ..} => self._get_stack::(config), - Version{major: 3, minor: 8, ..} => self._get_stack::(config), - Version{major: 3, minor: 9, ..} => self._get_stack::(config), - Version{major: 3, minor: 10, ..} => self._get_stack::(config), - Version{major: 3, minor: 11, ..} => self._get_stack::(config), - _ => Err(format_err!("Unsupported version of Python: {}", self.version)) + Version { + major: 2, + minor: 3..=7, + .. + } => self._get_stack::(config), + Version { + major: 3, minor: 3, .. + } => self._get_stack::(config), + Version { + major: 3, + minor: 4..=5, + .. + } => self._get_stack::(config), + Version { + major: 3, minor: 6, .. + } => self._get_stack::(config), + Version { + major: 3, minor: 7, .. + } => self._get_stack::(config), + Version { + major: 3, minor: 8, .. + } => self._get_stack::(config), + Version { + major: 3, minor: 9, .. + } => self._get_stack::(config), + Version { + major: 3, + minor: 10, + .. + } => self._get_stack::(config), + Version { + major: 3, + minor: 11, + .. + } => self._get_stack::(config), + _ => Err(format_err!( + "Unsupported version of Python: {}", + self.version + )), }?) } fn _get_stack(&self, config: &Config) -> Result, Error> { let interp: I = self.core.copy_struct(self.interpreter_address)?; - let mut traces = get_stack_traces(&interp, &self.core, self.threadstate_address, Some(config))?; + let mut traces = + get_stack_traces(&interp, &self.core, self.threadstate_address, Some(config))?; let thread_names = thread_names_from_interpreter(&interp, &self.core, &self.version).ok(); for trace in &mut traces { @@ -247,7 +331,12 @@ impl PythonCoreDump { if let Some(locals) = frame.locals.as_mut() { let max_length = (128 * config.dump_locals) as isize; for local in locals { - let repr = format_variable::(&self.core, &self.version, local.addr, max_length); + let repr = format_variable::( + &self.core, + &self.version, + local.addr, + max_length, + ); local.repr = Some(repr.unwrap_or("?".to_owned())); } } @@ -259,20 +348,24 @@ impl PythonCoreDump { pub fn print_traces(&self, traces: &Vec, config: &Config) -> Result<(), Error> { if config.dump_json { println!("{}", serde_json::to_string_pretty(&traces)?); - return Ok(()) + return Ok(()); } for status in &self.core.status { - println!("Signal {}: {}", + println!( + "Signal {}: {}", style(status.pr_cursig).bold().yellow(), - self.core.filename.display()); + self.core.filename.display() + ); break; } if let Some(psinfo) = self.core.psinfo { - println!("Process {}: {}", + println!( + "Process {}: {}", style(psinfo.pr_pid).bold().yellow(), - OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy()); + OsStr::from_bytes(&psinfo.pr_psargs).to_string_lossy() + ); } println!("Python v{}", style(&self.version).bold()); println!(""); @@ -349,12 +442,18 @@ mod test { // so we can't (yet) figure out the interpreter address & version. // Manually specify here to test out instead let core = CoreDump::new(&get_coredump_path("python_3_9_threads")).unwrap(); - let version = Version{major: 3, minor: 9, patch: 13, release_flags: "".to_owned()}; + let version = Version { + major: 3, + minor: 9, + patch: 13, + release_flags: "".to_owned(), + }; let python_core = PythonCoreDump { core, version, interpreter_address: 0x000055a8293dbe20, - threadstate_address: 0x000055a82745fe18}; + threadstate_address: 0x000055a82745fe18, + }; let config = Config::default(); let traces = python_core.get_stack(&config).unwrap(); diff --git a/src/cython.rs b/src/cython.rs index 1a0a9814..1b7ed352 100644 --- a/src/cython.rs +++ b/src/cython.rs @@ -1,13 +1,12 @@ - +use regex::Regex; use std; use std::collections::{BTreeMap, HashMap}; -use regex::Regex; use anyhow::Error; use lazy_static::lazy_static; -use crate::utils::resolve_filename; use crate::stack_trace::Frame; +use crate::utils::resolve_filename; pub struct SourceMaps { maps: HashMap>, @@ -16,7 +15,7 @@ pub struct SourceMaps { impl SourceMaps { pub fn new() -> SourceMaps { let maps = HashMap::new(); - SourceMaps{maps} + SourceMaps { maps } } pub fn translate(&mut self, frame: &mut Frame) { @@ -67,7 +66,7 @@ impl SourceMaps { } struct SourceMap { - lookup: BTreeMap + lookup: BTreeMap, } impl SourceMap { @@ -76,7 +75,11 @@ impl SourceMap { SourceMap::from_contents(&contents, filename, module) } - pub fn from_contents(contents: &str, cpp_filename: &str, module: &Option) -> Result { + pub fn from_contents( + contents: &str, + cpp_filename: &str, + module: &Option, + ) -> Result { lazy_static! { static ref RE: Regex = Regex::new(r#"^\s*/\* "(.+\..+)":([0-9]+)"#).unwrap(); } @@ -108,7 +111,7 @@ impl SourceMap { } lookup.insert(line_count + 1, ("".to_owned(), 0)); - Ok(SourceMap{lookup}) + Ok(SourceMap { lookup }) } pub fn lookup(&self, lineno: u32) -> Option<&(String, u32)> { @@ -116,25 +119,38 @@ impl SourceMap { // handle EOF Some((_, (_, 0))) => None, Some((_, val)) => Some(val), - None => None + None => None, } } } pub fn ignore_frame(name: &str) -> bool { - let ignorable = ["__Pyx_PyFunction_FastCallDict", "__Pyx_PyObject_CallOneArg", - "__Pyx_PyObject_Call", "__Pyx_PyObject_Call", "__pyx_FusedFunction_call"]; + let ignorable = [ + "__Pyx_PyFunction_FastCallDict", + "__Pyx_PyObject_CallOneArg", + "__Pyx_PyObject_Call", + "__Pyx_PyObject_Call", + "__pyx_FusedFunction_call", + ]; ignorable.iter().any(|&f| f == name) } pub fn demangle(name: &str) -> &str { // slice off any leading cython prefix. - let prefixes = ["__pyx_fuse_1_0__pyx_pw", "__pyx_fuse_0__pyx_f", "__pyx_fuse_1__pyx_f", - "__pyx_pf", "__pyx_pw", "__pyx_f", "___pyx_f", "___pyx_pw"]; + let prefixes = [ + "__pyx_fuse_1_0__pyx_pw", + "__pyx_fuse_0__pyx_f", + "__pyx_fuse_1__pyx_f", + "__pyx_pf", + "__pyx_pw", + "__pyx_f", + "___pyx_f", + "___pyx_pw", + ]; let mut current = match prefixes.iter().find(|&prefix| name.starts_with(prefix)) { Some(prefix) => &name[prefix.len()..], - None => return name + None => return name, }; let mut next = current; @@ -166,8 +182,8 @@ pub fn demangle(name: &str) -> &str { break; } next = &next[digits + digit_index..]; - }, - Err(_) => { break } + } + Err(_) => break, }; } debug!("cython_demangle(\"{}\") -> \"{}\"", name, current); @@ -175,7 +191,11 @@ pub fn demangle(name: &str) -> &str { current } -fn resolve_cython_file(cpp_filename: &str, cython_filename: &str, module: &Option) -> String { +fn resolve_cython_file( + cpp_filename: &str, + cython_filename: &str, + module: &Option, +) -> String { let cython_path = std::path::PathBuf::from(cython_filename); if let Some(ext) = cython_path.extension() { let mut path_buf = std::path::PathBuf::from(cpp_filename); @@ -187,10 +207,9 @@ fn resolve_cython_file(cpp_filename: &str, cython_filename: &str, module: &Optio match module { Some(module) => { - resolve_filename(cython_filename, module) - .unwrap_or_else(|| cython_filename.to_owned()) - }, - None => cython_filename.to_owned() + resolve_filename(cython_filename, module).unwrap_or_else(|| cython_filename.to_owned()) + } + None => cython_filename.to_owned(), } } @@ -200,34 +219,58 @@ mod tests { #[test] fn test_demangle() { // all of these were wrong at certain points when writing cython_demangle =( - assert_eq!(demangle("__pyx_pf_8implicit_4_als_30_least_squares_cg"), "_least_squares_cg"); - assert_eq!(demangle("__pyx_pw_8implicit_4_als_5least_squares_cg"), "least_squares_cg"); - assert_eq!(demangle("__pyx_fuse_1_0__pyx_pw_8implicit_4_als_31_least_squares_cg"), "_least_squares_cg"); - assert_eq!(demangle("__pyx_f_6mtrand_cont0_array"), "mtrand_cont0_array"); + assert_eq!( + demangle("__pyx_pf_8implicit_4_als_30_least_squares_cg"), + "_least_squares_cg" + ); + assert_eq!( + demangle("__pyx_pw_8implicit_4_als_5least_squares_cg"), + "least_squares_cg" + ); + assert_eq!( + demangle("__pyx_fuse_1_0__pyx_pw_8implicit_4_als_31_least_squares_cg"), + "_least_squares_cg" + ); + assert_eq!( + demangle("__pyx_f_6mtrand_cont0_array"), + "mtrand_cont0_array" + ); // in both of these cases we should ideally slice off the module (_als/bpr), but it gets tricky // implementation wise - assert_eq!(demangle("__pyx_fuse_0__pyx_f_8implicit_4_als_axpy"), "_als_axpy"); - assert_eq!(demangle("__pyx_fuse_1__pyx_f_8implicit_3bpr_has_non_zero"), "bpr_has_non_zero"); + assert_eq!( + demangle("__pyx_fuse_0__pyx_f_8implicit_4_als_axpy"), + "_als_axpy" + ); + assert_eq!( + demangle("__pyx_fuse_1__pyx_f_8implicit_3bpr_has_non_zero"), + "bpr_has_non_zero" + ); } #[test] fn test_source_map() { - let map = SourceMap::from_contents(include_str!("../ci/testdata/cython_test.c"), "cython_test.c", &None).unwrap(); + let map = SourceMap::from_contents( + include_str!("../ci/testdata/cython_test.c"), + "cython_test.c", + &None, + ) + .unwrap(); // we don't have info on cython line numbers until line 1261 assert_eq!(map.lookup(1000), None); // past the end of the file should also return none assert_eq!(map.lookup(10000), None); - let lookup = |lineno: u32, cython_file: &str, cython_line: u32| { - match map.lookup(lineno) { - Some((file, line)) => { - assert_eq!(file, cython_file); - assert_eq!(line, &cython_line); - }, - None => { - panic!("Failed to lookup line {} (expected {}:{})", lineno, cython_file, cython_line); - } + let lookup = |lineno: u32, cython_file: &str, cython_line: u32| match map.lookup(lineno) { + Some((file, line)) => { + assert_eq!(file, cython_file); + assert_eq!(line, &cython_line); + } + None => { + panic!( + "Failed to lookup line {} (expected {}:{})", + lineno, cython_file, cython_line + ); } }; lookup(1298, "cython_test.pyx", 6); diff --git a/src/dump.rs b/src/dump.rs index 03d57a75..a62c8047 100644 --- a/src/dump.rs +++ b/src/dump.rs @@ -1,5 +1,5 @@ use anyhow::Error; -use console::{Term, style}; +use console::{style, Term}; use crate::config::Config; use crate::python_spy::PythonSpy; @@ -12,29 +12,39 @@ pub fn print_traces(pid: Pid, config: &Config, parent: Option) -> Result<() if config.dump_json { let traces = process.get_stack_traces()?; println!("{}", serde_json::to_string_pretty(&traces)?); - return Ok(()) + return Ok(()); } - println!("Process {}: {}", + println!( + "Process {}: {}", style(process.pid).bold().yellow(), - process.process.cmdline()?.join(" ")); + process.process.cmdline()?.join(" ") + ); - println!("Python v{} ({})", + println!( + "Python v{} ({})", style(&process.version).bold(), - style(process.process.exe()?).dim()); + style(process.process.exe()?).dim() + ); if let Some(parentpid) = parent { let parentprocess = remoteprocess::Process::new(parentpid)?; - println!("Parent Process {}: {}", + println!( + "Parent Process {}: {}", style(parentpid).bold().yellow(), - parentprocess.cmdline()?.join(" ")); + parentprocess.cmdline()?.join(" ") + ); } println!(""); let traces = process.get_stack_traces()?; for trace in traces.iter().rev() { print_trace(trace, true); if config.subprocesses { - for (childpid, parentpid) in process.process.child_processes().expect("failed to get subprocesses") { + for (childpid, parentpid) in process + .process + .child_processes() + .expect("failed to get subprocesses") + { let term = Term::stdout(); let (_, width) = term.size(); @@ -64,7 +74,12 @@ pub fn print_trace(trace: &StackTrace, include_activity: bool) { match trace.thread_name.as_ref() { Some(name) => { - println!("Thread {}{}: \"{}\"", style(thread_id).bold().yellow(), status, name); + println!( + "Thread {}{}: \"{}\"", + style(thread_id).bold().yellow(), + status, + name + ); } None => { println!("Thread {}{}", style(thread_id).bold().yellow(), status); @@ -72,11 +87,23 @@ pub fn print_trace(trace: &StackTrace, include_activity: bool) { }; for frame in &trace.frames { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; + let filename = match &frame.short_filename { + Some(f) => &f, + None => &frame.filename, + }; if frame.line != 0 { - println!(" {} ({}:{})", style(&frame.name).green(), style(&filename).cyan(), style(frame.line).dim()); + println!( + " {} ({}:{})", + style(&frame.name).green(), + style(&filename).cyan(), + style(frame.line).dim() + ); } else { - println!(" {} ({})", style(&frame.name).green(), style(&filename).cyan()); + println!( + " {} ({})", + style(&frame.name).green(), + style(&filename).cyan() + ); } if let Some(locals) = &frame.locals { diff --git a/src/flamegraph.rs b/src/flamegraph.rs index ef33c379..bd6acac7 100644 --- a/src/flamegraph.rs +++ b/src/flamegraph.rs @@ -26,10 +26,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -use std::io::Write; use std; use std::collections::HashMap; - +use std::io::Write; use anyhow::Error; use inferno::flamegraph::{Direction, Options}; @@ -43,32 +42,47 @@ pub struct Flamegraph { impl Flamegraph { pub fn new(show_linenumbers: bool) -> Flamegraph { - Flamegraph { counts: HashMap::new(), show_linenumbers } + Flamegraph { + counts: HashMap::new(), + show_linenumbers, + } } pub fn increment(&mut self, trace: &StackTrace) -> std::io::Result<()> { // convert the frame into a single ';' delimited String - let frame = trace.frames.iter().rev().map(|frame| { - let filename = match &frame.short_filename { Some(f) => &f, None => &frame.filename }; - if self.show_linenumbers && frame.line != 0 { - format!("{} ({}:{})", frame.name, filename, frame.line) - } else if filename.len() > 0 { - format!("{} ({})", frame.name, filename) - } else { - frame.name.clone() - } - }).collect::>().join(";"); + let frame = trace + .frames + .iter() + .rev() + .map(|frame| { + let filename = match &frame.short_filename { + Some(f) => &f, + None => &frame.filename, + }; + if self.show_linenumbers && frame.line != 0 { + format!("{} ({}:{})", frame.name, filename, frame.line) + } else if filename.len() > 0 { + format!("{} ({})", frame.name, filename) + } else { + frame.name.clone() + } + }) + .collect::>() + .join(";"); // update counts for that frame *self.counts.entry(frame).or_insert(0) += 1; Ok(()) } fn get_lines(&self) -> Vec { - self.counts.iter().map(|(k, v)| format!("{} {}", k, v)).collect() + self.counts + .iter() + .map(|(k, v)| format!("{} {}", k, v)) + .collect() } pub fn write(&self, w: &mut dyn Write) -> Result<(), Error> { - let mut opts = Options::default(); + let mut opts = Options::default(); opts.direction = Direction::Inverted; opts.min_width = 0.1; opts.title = std::env::args().collect::>().join(" "); diff --git a/src/lib.rs b/src/lib.rs index da73a7ad..424e253e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,9 +29,9 @@ extern crate anyhow; #[macro_use] extern crate log; -pub mod config; pub mod binary_parser; -#[cfg(target_os="linux")] +pub mod config; +#[cfg(target_os = "linux")] pub mod coredump; #[cfg(unwind)] mod cython; @@ -39,10 +39,10 @@ pub mod dump; #[cfg(unwind)] mod native_stack_trace; mod python_bindings; +mod python_data_access; mod python_interpreters; -pub mod python_spy; pub mod python_process_info; -mod python_data_access; +pub mod python_spy; mod python_threading; pub mod sampler; pub mod stack_trace; @@ -50,8 +50,8 @@ pub mod timer; mod utils; mod version; -pub use python_spy::PythonSpy; pub use config::Config; -pub use stack_trace::StackTrace; -pub use stack_trace::Frame; +pub use python_spy::PythonSpy; pub use remoteprocess::Pid; +pub use stack_trace::Frame; +pub use stack_trace::StackTrace; diff --git a/src/main.rs b/src/main.rs index f355cb2e..10c3a78a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,26 +3,26 @@ extern crate anyhow; #[macro_use] extern crate log; +mod binary_parser; mod config; -#[cfg(target_os="linux")] +mod console_viewer; +#[cfg(target_os = "linux")] mod coredump; -mod dump; -mod binary_parser; #[cfg(unwind)] mod cython; +mod dump; +mod flamegraph; #[cfg(unwind)] mod native_stack_trace; mod python_bindings; +mod python_data_access; mod python_interpreters; mod python_process_info; mod python_spy; -mod python_data_access; mod python_threading; -mod stack_trace; -mod console_viewer; -mod flamegraph; -mod speedscope; mod sampler; +mod speedscope; +mod stack_trace; mod timer; mod utils; mod version; @@ -32,40 +32,40 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::Duration; -use console::style; use anyhow::Error; +use console::style; -use stack_trace::{StackTrace, Frame}; -use console_viewer::ConsoleViewer; use config::{Config, FileFormat, RecordDuration}; +use console_viewer::ConsoleViewer; +use stack_trace::{Frame, StackTrace}; -use chrono::{SecondsFormat, Local}; +use chrono::{Local, SecondsFormat}; #[cfg(unix)] fn permission_denied(err: &Error) -> bool { err.chain().any(|cause| { if let Some(ioerror) = cause.downcast_ref::() { ioerror.kind() == std::io::ErrorKind::PermissionDenied - } else if let Some(remoteprocess::Error::IOError(ioerror)) = cause.downcast_ref::() { + } else if let Some(remoteprocess::Error::IOError(ioerror)) = + cause.downcast_ref::() + { ioerror.kind() == std::io::ErrorKind::PermissionDenied - }else { + } else { false } }) } -fn sample_console(pid: remoteprocess::Pid, - config: &Config) -> Result<(), Error> { +fn sample_console(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> { let sampler = sampler::Sampler::new(pid, config)?; let display = match remoteprocess::Process::new(pid)?.cmdline() { Ok(cmdline) => cmdline.join(" "), - Err(_) => format!("Pid {}", pid) + Err(_) => format!("Pid {}", pid), }; - let mut console = ConsoleViewer::new(config.show_line_numbers, &display, - &sampler.version, - config)?; + let mut console = + ConsoleViewer::new(config.show_line_numbers, &display, &sampler.version, config)?; for sample in sampler { if let Some(elapsed) = sample.late { console.increment_late_sample(elapsed); @@ -122,10 +122,14 @@ impl Recorder for RawFlamegraph { fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> { let mut output: Box = match config.format { - Some(FileFormat::flamegraph) => Box::new(flamegraph::Flamegraph::new(config.show_line_numbers)), - Some(FileFormat::speedscope) => Box::new(speedscope::Stats::new(config)), - Some(FileFormat::raw) => Box::new(RawFlamegraph(flamegraph::Flamegraph::new(config.show_line_numbers))), - None => return Err(format_err!("A file format is required to record samples")) + Some(FileFormat::flamegraph) => { + Box::new(flamegraph::Flamegraph::new(config.show_line_numbers)) + } + Some(FileFormat::speedscope) => Box::new(speedscope::Stats::new(config)), + Some(FileFormat::raw) => Box::new(RawFlamegraph(flamegraph::Flamegraph::new( + config.show_line_numbers, + ))), + None => return Err(format_err!("A file format is required to record samples")), }; let filename = match config.filename.clone() { @@ -135,18 +139,18 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> Some(FileFormat::flamegraph) => "svg", Some(FileFormat::speedscope) => "json", Some(FileFormat::raw) => "txt", - None => return Err(format_err!("A file format is required to record samples")) + None => return Err(format_err!("A file format is required to record samples")), }; let local_time = Local::now().to_rfc3339_opts(SecondsFormat::Secs, true); let name = match config.python_program.as_ref() { Some(prog) => prog[0].to_string(), - None => match config.pid.as_ref() { + None => match config.pid.as_ref() { Some(pid) => pid.to_string(), - None => String::from("unknown") - } + None => String::from("unknown"), + }, }; format!("{}-{}.{}", name, local_time, ext) - } + } }; let sampler = sampler::Sampler::new(pid, config)?; @@ -162,11 +166,17 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> let max_intervals = match &config.duration { RecordDuration::Unlimited => { - println!("{}Sampling process {} times a second. Press Control-C to exit.", lede, config.sampling_rate); + println!( + "{}Sampling process {} times a second. Press Control-C to exit.", + lede, config.sampling_rate + ); None - }, + } RecordDuration::Seconds(sec) => { - println!("{}Sampling process {} times a second for {} seconds. Press Control-C to exit.", lede, config.sampling_rate, sec); + println!( + "{}Sampling process {} times a second for {} seconds. Press Control-C to exit.", + lede, config.sampling_rate, sec + ); Some(sec * config.sampling_rate) } }; @@ -181,7 +191,9 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> // The spinner on windows doesn't look great: was replaced by a [?] character at least on // my system. Replace unicode spinners with just how many seconds have elapsed #[cfg(windows)] - progress.set_style(indicatif::ProgressStyle::default_spinner().template("[{elapsed}] {msg}")); + progress.set_style( + indicatif::ProgressStyle::default_spinner().template("[{elapsed}] {msg}"), + ); progress } }; @@ -243,9 +255,14 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> if config.include_thread_ids { let threadid = trace.format_threadid(); - trace.frames.push(Frame{name: format!("thread ({})", threadid), + trace.frames.push(Frame { + name: format!("thread ({})", threadid), filename: String::from(""), - module: None, short_filename: None, line: 0, locals: None}); + module: None, + short_filename: None, + line: 0, + locals: None, + }); } if let Some(process_info) = trace.process_info.as_ref().map(|x| x) { @@ -287,25 +304,34 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> } { - let mut out_file = std::fs::File::create(&filename)?; - output.write(&mut out_file)?; + let mut out_file = std::fs::File::create(&filename)?; + output.write(&mut out_file)?; } match config.format.as_ref().unwrap() { FileFormat::flamegraph => { - println!("{}Wrote flamegraph data to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors); + println!( + "{}Wrote flamegraph data to '{}'. Samples: {} Errors: {}", + lede, filename, samples, errors + ); // open generated flame graph in the browser on OSX (theory being that on linux // you might be SSH'ed into a server somewhere and this isn't desired, but on // that is pretty unlikely for osx) (note to self: xdg-open will open on linux) #[cfg(target_os = "macos")] std::process::Command::new("open").arg(&filename).spawn()?; - }, - FileFormat::speedscope => { - println!("{}Wrote speedscope file to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors); + } + FileFormat::speedscope => { + println!( + "{}Wrote speedscope file to '{}'. Samples: {} Errors: {}", + lede, filename, samples, errors + ); println!("{}Visit https://www.speedscope.app/ to view", lede); - }, + } FileFormat::raw => { - println!("{}Wrote raw flamegraph data to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors); + println!( + "{}Wrote raw flamegraph data to '{}'. Samples: {} Errors: {}", + lede, filename, samples, errors + ); println!("{}You can use the flamegraph.pl script from https://github.com/brendangregg/flamegraph to generate a SVG", lede); } }; @@ -315,12 +341,12 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> fn run_spy_command(pid: remoteprocess::Pid, config: &config::Config) -> Result<(), Error> { match config.command.as_ref() { - "dump" => { + "dump" => { dump::print_traces(pid, config, None)?; - }, + } "record" => { record_samples(pid, config)?; - }, + } "top" => { sample_console(pid, config)?; } @@ -335,7 +361,7 @@ fn run_spy_command(pid: remoteprocess::Pid, config: &config::Config) -> Result<( fn pyspy_main() -> Result<(), Error> { let config = config::Config::from_commandline(); - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { if unsafe { libc::geteuid() } != 0 { eprintln!("This program requires root on OSX."); @@ -344,7 +370,7 @@ fn pyspy_main() -> Result<(), Error> { } } - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] { if let Some(ref core_filename) = config.core_filename { let core = coredump::PythonCoreDump::new(std::path::Path::new(&core_filename))?; @@ -355,9 +381,7 @@ fn pyspy_main() -> Result<(), Error> { if let Some(pid) = config.pid { run_spy_command(pid, &config)?; - } - - else if let Some(ref subprocess) = config.python_program { + } else if let Some(ref subprocess) = config.python_program { // Dump out stdout/stderr from the process to a temp file, so we can view it later if needed let mut process_output = tempfile::NamedTempFile::new()?; @@ -368,7 +392,10 @@ fn pyspy_main() -> Result<(), Error> { if unsafe { libc::geteuid() } == 0 { if let Ok(sudo_uid) = std::env::var("SUDO_UID") { use std::os::unix::process::CommandExt; - info!("Dropping root and running python command as {}", std::env::var("SUDO_USER")?); + info!( + "Dropping root and running python command as {}", + std::env::var("SUDO_USER")? + ); command.uid(sudo_uid.parse::()?); } } @@ -377,15 +404,17 @@ fn pyspy_main() -> Result<(), Error> { let mut command = command.args(&subprocess[1..]); if config.capture_output { - command = command.stdin(std::process::Stdio::null()) + command = command + .stdin(std::process::Stdio::null()) .stdout(process_output.reopen()?) .stderr(process_output.reopen()?) } - let mut command = command.spawn() + let mut command = command + .spawn() .map_err(|e| format_err!("Failed to create process '{}': {}", subprocess[0], e))?; - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // sleep just in case: https://jvns.ca/blog/2018/01/28/mac-freeze/ std::thread::sleep(Duration::from_millis(50)); @@ -394,10 +423,10 @@ fn pyspy_main() -> Result<(), Error> { // check exit code of subprocess std::thread::sleep(Duration::from_millis(1)); - let success = match command.try_wait()? { + let success = match command.try_wait()? { Some(exit) => exit.success(), // if process hasn't finished, assume success - None => true + None => true, }; // if we failed for any reason, dump out stderr from child process here @@ -421,34 +450,37 @@ fn pyspy_main() -> Result<(), Error> { } fn main() { - env_logger::builder().format_timestamp_nanos().try_init().unwrap(); + env_logger::builder() + .format_timestamp_nanos() + .try_init() + .unwrap(); if let Err(err) = pyspy_main() { #[cfg(unix)] { - if permission_denied(&err) { - // Got a permission denied error, if we're not running as root - ask to use sudo - if unsafe { libc::geteuid() } != 0 { - eprintln!("Permission Denied: Try running again with elevated permissions by going 'sudo env \"PATH=$PATH\" !!'"); - std::process::exit(1); - } + if permission_denied(&err) { + // Got a permission denied error, if we're not running as root - ask to use sudo + if unsafe { libc::geteuid() } != 0 { + eprintln!("Permission Denied: Try running again with elevated permissions by going 'sudo env \"PATH=$PATH\" !!'"); + std::process::exit(1); + } - // We got a permission denied error running as root, check to see if we're running - // as docker, and if so ask the user to check the SYS_PTRACE capability is added - // Otherwise, fall through to the generic error handling - #[cfg(target_os="linux")] - if let Ok(cgroups) = std::fs::read_to_string("/proc/self/cgroup") { - if cgroups.contains("/docker/") { - eprintln!("Permission Denied"); - eprintln!("\nIt looks like you are running in a docker container. Please make sure \ + // We got a permission denied error running as root, check to see if we're running + // as docker, and if so ask the user to check the SYS_PTRACE capability is added + // Otherwise, fall through to the generic error handling + #[cfg(target_os = "linux")] + if let Ok(cgroups) = std::fs::read_to_string("/proc/self/cgroup") { + if cgroups.contains("/docker/") { + eprintln!("Permission Denied"); + eprintln!("\nIt looks like you are running in a docker container. Please make sure \ you started your container with the SYS_PTRACE capability. See \ https://github.com/benfred/py-spy#how-do-i-run-py-spy-in-docker for \ more details"); - std::process::exit(1); + std::process::exit(1); + } } } } - } eprintln!("Error: {}", err); for (i, suberror) in err.chain().enumerate() { diff --git a/src/native_stack_trace.rs b/src/native_stack_trace.rs index d4dd1981..73ac2380 100644 --- a/src/native_stack_trace.rs +++ b/src/native_stack_trace.rs @@ -1,14 +1,14 @@ -use std::collections::HashSet; use anyhow::Error; +use std::collections::HashSet; -use cpp_demangle::{DemangleOptions, BorrowedSymbol}; -use remoteprocess::{self, Pid}; +use cpp_demangle::{BorrowedSymbol, DemangleOptions}; use lazy_static::lazy_static; use lru::LruCache; +use remoteprocess::{self, Pid}; use crate::binary_parser::BinaryInfo; use crate::cython; -use crate::stack_trace::{Frame}; +use crate::stack_trace::Frame; use crate::utils::resolve_filename; pub struct NativeStack { @@ -25,22 +25,34 @@ pub struct NativeStack { } impl NativeStack { - pub fn new(pid: Pid, python: Option, libpython: Option) -> Result { + pub fn new( + pid: Pid, + python: Option, + libpython: Option, + ) -> Result { let cython_maps = cython::SourceMaps::new(); let process = remoteprocess::Process::new(pid)?; let unwinder = process.unwinder()?; let symbolicator = process.symbolicator()?; - return Ok(NativeStack{cython_maps, unwinder, symbolicator, should_reload: false, - python, - libpython, - process, - symbol_cache: LruCache::new(65536) - }); + return Ok(NativeStack { + cython_maps, + unwinder, + symbolicator, + should_reload: false, + python, + libpython, + process, + symbol_cache: LruCache::new(65536), + }); } - pub fn merge_native_thread(&mut self, frames: &Vec, thread: &remoteprocess::Thread) -> Result, Error> { + pub fn merge_native_thread( + &mut self, + frames: &Vec, + thread: &remoteprocess::Thread, + ) -> Result, Error> { if self.should_reload { self.symbolicator.reload()?; self.should_reload = false; @@ -52,7 +64,11 @@ impl NativeStack { // TODO: merging the two stack together could happen outside of thread lock return self.merge_native_stack(frames, native_stack); } - pub fn merge_native_stack(&mut self, frames: &Vec, native_stack: Vec) -> Result, Error> { + pub fn merge_native_stack( + &mut self, + frames: &Vec, + native_stack: Vec, + ) -> Result, Error> { let mut python_frame_index = 0; let mut merged = Vec::new(); @@ -62,16 +78,16 @@ impl NativeStack { let cached_symbol = self.symbol_cache.get(&addr).map(|f| f.clone()); // merges a remoteprocess::StackFrame into the current merged vec - let is_python_addr = self.python.as_ref().map_or(false, |m| m.contains(addr)) || - self.libpython.as_ref().map_or(false, |m| m.contains(addr)); + let is_python_addr = self.python.as_ref().map_or(false, |m| m.contains(addr)) + || self.libpython.as_ref().map_or(false, |m| m.contains(addr)); let merge_frame = &mut |frame: &remoteprocess::StackFrame| { match self.get_merge_strategy(is_python_addr, frame) { - MergeType::Ignore => {}, + MergeType::Ignore => {} MergeType::MergeNativeFrame => { if let Some(python_frame) = self.translate_native_frame(frame) { merged.push(python_frame); } - }, + } MergeType::MergePythonFrame => { // if we have a corresponding python frame for the evalframe // merge it into the stack. (if we're out of bounds a later @@ -96,22 +112,36 @@ impl NativeStack { let mut symbolicated_count = 0; let mut first_frame = None; - self.symbolicator.symbolicate(addr, !is_python_addr, &mut |frame: &remoteprocess::StackFrame| { - symbolicated_count += 1; - if symbolicated_count == 1 { - first_frame = Some(frame.clone()); - } - merge_frame(frame); - }).unwrap_or_else(|e| { - if let remoteprocess::Error::NoBinaryForAddress(_) = e { - debug!("don't have a binary for symbols at 0x{:x} - reloading", addr); - self.should_reload = true; - } - // if we can't symbolicate, just insert a stub here. - merged.push(Frame{filename: "?".to_owned(), - name: format!("0x{:x}", addr), - line: 0, short_filename: None, module: None, locals: None}); - }); + self.symbolicator + .symbolicate( + addr, + !is_python_addr, + &mut |frame: &remoteprocess::StackFrame| { + symbolicated_count += 1; + if symbolicated_count == 1 { + first_frame = Some(frame.clone()); + } + merge_frame(frame); + }, + ) + .unwrap_or_else(|e| { + if let remoteprocess::Error::NoBinaryForAddress(_) = e { + debug!( + "don't have a binary for symbols at 0x{:x} - reloading", + addr + ); + self.should_reload = true; + } + // if we can't symbolicate, just insert a stub here. + merged.push(Frame { + filename: "?".to_owned(), + name: format!("0x{:x}", addr), + line: 0, + short_filename: None, + module: None, + locals: None, + }); + }); if symbolicated_count == 1 { self.symbol_cache.put(addr, first_frame.unwrap()); @@ -134,11 +164,17 @@ impl NativeStack { // if we have seen exactly one more python frame in the native stack than the python stack - let it go. // (can happen when the python stack has been unwound, but haven't exited the PyEvalFrame function // yet) - info!("Have {} native and {} python threads in stack - allowing for now", - python_frame_index, frames.len()); + info!( + "Have {} native and {} python threads in stack - allowing for now", + python_frame_index, + frames.len() + ); } else { - return Err(format_err!("Failed to merge native and python frames (Have {} native and {} python)", - python_frame_index, frames.len())); + return Err(format_err!( + "Failed to merge native and python frames (Have {} native and {} python)", + python_frame_index, + frames.len() + )); } } @@ -150,7 +186,11 @@ impl NativeStack { Ok(merged) } - fn get_merge_strategy(&self, check_python: bool, frame: &remoteprocess::StackFrame) -> MergeType { + fn get_merge_strategy( + &self, + check_python: bool, + frame: &remoteprocess::StackFrame, + ) -> MergeType { if check_python { if let Some(ref function) = frame.function { // We want to include some internal python functions. For example, calls like time.sleep @@ -182,15 +222,15 @@ impl NativeStack { // _PyEval_EvalFrameDefault.cold.2962 let mut tokens = function.split(&['_', '.'][..]).filter(|&x| x.len() > 0); match tokens.next() { - Some("PyEval") => { - match tokens.next() { - Some("EvalFrameDefault") => MergeType::MergePythonFrame, - Some("EvalFrameEx") => MergeType::MergePythonFrame, - _ => MergeType::Ignore - } + Some("PyEval") => match tokens.next() { + Some("EvalFrameDefault") => MergeType::MergePythonFrame, + Some("EvalFrameEx") => MergeType::MergePythonFrame, + _ => MergeType::Ignore, }, - Some(prefix) if WHITELISTED_PREFIXES.contains(prefix) => MergeType::MergeNativeFrame, - _ => MergeType::Ignore + Some(prefix) if WHITELISTED_PREFIXES.contains(prefix) => { + MergeType::MergeNativeFrame + } + _ => MergeType::Ignore, } } else { // is this correct? if we don't have a function name and in python binary should ignore? @@ -204,7 +244,7 @@ impl NativeStack { /// translates a native frame into a optional frame. none indicates we should ignore this frame fn translate_native_frame(&self, frame: &remoteprocess::StackFrame) -> Option { match &frame.function { - Some(func) => { + Some(func) => { if ignore_frame(func, &frame.module) { return None; } @@ -214,11 +254,9 @@ impl NativeStack { // try to resolve the filename relative to the module if given let filename = match frame.filename.as_ref() { - Some(filename) => { - resolve_filename(filename, &frame.module) - .unwrap_or_else(|| filename.clone()) - }, - None => frame.module.clone() + Some(filename) => resolve_filename(filename, &frame.module) + .unwrap_or_else(|| filename.clone()), + None => frame.module.clone(), }; let mut demangled = None; @@ -235,13 +273,23 @@ impl NativeStack { return None; } let name = cython::demangle(&name).to_owned(); - Some(Frame{filename, line, name, short_filename: None, module: Some(frame.module.clone()), locals: None}) - }, - None => { - Some(Frame{filename: frame.module.clone(), - name: format!("0x{:x}", frame.addr), locals: None, - line: 0, short_filename: None, module: Some(frame.module.clone())}) + Some(Frame { + filename, + line, + name, + short_filename: None, + module: Some(frame.module.clone()), + locals: None, + }) } + None => Some(Frame { + filename: frame.module.clone(), + name: format!("0x{:x}", frame.addr), + locals: None, + line: 0, + short_filename: None, + module: Some(frame.module.clone()), + }), } } @@ -258,12 +306,12 @@ impl NativeStack { enum MergeType { Ignore, MergePythonFrame, - MergeNativeFrame + MergeNativeFrame, } // the intent here is to remove top-level libc or pthreads calls // from the stack traces. This almost certainly can be done better -#[cfg(target_os="linux")] +#[cfg(target_os = "linux")] fn ignore_frame(function: &str, module: &str) -> bool { if function == "__libc_start_main" && module.contains("/libc") { return true; @@ -280,7 +328,7 @@ fn ignore_frame(function: &str, module: &str) -> bool { false } -#[cfg(target_os="macos")] +#[cfg(target_os = "macos")] fn ignore_frame(function: &str, module: &str) -> bool { if function == "_start" && module.contains("/libdyld.dylib") { return true; diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs index e69156a0..74a6ce89 100644 --- a/src/python_bindings/mod.rs +++ b/src/python_bindings/mod.rs @@ -1,12 +1,12 @@ pub mod v2_7_15; +pub mod v3_10_0; +pub mod v3_11_0; pub mod v3_3_7; pub mod v3_5_5; pub mod v3_6_6; pub mod v3_7_0; pub mod v3_8_0; pub mod v3_9_5; -pub mod v3_10_0; -pub mod v3_11_0; // currently the PyRuntime struct used from Python 3.7 on really can't be // exposed in a cross platform way using bindgen. PyRuntime has several mutex's @@ -23,125 +23,228 @@ pub mod pyruntime { #[cfg(target_arch = "x86")] pub fn get_interp_head_offset(version: &Version) -> usize { match version { - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" | "a2" => 16, - "a3" | "a4" => 20, - _ => 24 - } + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" | "a2" => 16, + "a3" | "a4" => 20, + _ => 24, }, - Version{major: 3, minor: 8..=10, ..} => 24, - _ => 16 + Version { + major: 3, + minor: 8..=10, + .. + } => 24, + _ => 16, } } #[cfg(target_arch = "arm")] pub fn get_interp_head_offset(version: &Version) -> usize { match version { - Version{major: 3, minor: 7, ..} => 20, - _ => 28 + Version { + major: 3, minor: 7, .. + } => 20, + _ => 28, } } #[cfg(target_pointer_width = "64")] pub fn get_interp_head_offset(version: &Version) -> usize { match version { - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" | "a2" => 24, - _ => 32 - } + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" | "a2" => 24, + _ => 32, }, - Version{major: 3, minor: 8..=10, ..} => 32, - Version{major: 3, minor: 11, ..} => 40, - _ => 24 + Version { + major: 3, + minor: 8..=10, + .. + } => 32, + Version { + major: 3, + minor: 11, + .. + } => 40, + _ => 24, } } // getting gilstate.tstate_current is different for all OS // and is also different for each python version, and even // between v3.8.0a1 and v3.8.0a2 =( - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1440), - Version{major: 3, minor: 7, ..} => Some(1528), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1432), - "a2" => Some(888), - "a3" | "a4" => Some(1448), - _ => Some(1416), - } + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1440), + Version { + major: 3, minor: 7, .. + } => Some(1528), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1432), + "a2" => Some(888), + "a3" | "a4" => Some(1448), + _ => Some(1416), }, - Version{major: 3, minor: 8, ..} => { Some(1416) }, - Version{major: 3, minor: 9..=10, ..} => { Some(616) }, - Version{major: 3, minor: 11, ..} => Some(624), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(1416), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(616), + Version { + major: 3, + minor: 11, + .. + } => Some(624), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="x86"))] + #[cfg(all(target_os = "linux", target_arch = "x86"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, ..} => Some(796), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(792), - "a2" => Some(512), - "a3" | "a4" => Some(800), - _ => Some(788) - } + Version { + major: 3, minor: 7, .. + } => Some(796), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(792), + "a2" => Some(512), + "a3" | "a4" => Some(800), + _ => Some(788), }, - Version{major: 3, minor: 8, ..} => Some(788), - Version{major: 3, minor: 9..=10, ..} => Some(352), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(788), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(352), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="arm"))] + #[cfg(all(target_os = "linux", target_arch = "arm"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, ..} => Some(828), - Version{major: 3, minor: 8, ..} => Some(804), - Version{major: 3, minor: 9..=11, ..} => Some(364), - _ => None + Version { + major: 3, minor: 7, .. + } => Some(828), + Version { + major: 3, minor: 8, .. + } => Some(804), + Version { + major: 3, + minor: 9..=11, + .. + } => Some(364), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="aarch64"))] + #[cfg(all(target_os = "linux", target_arch = "aarch64"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1408), - Version{major: 3, minor: 7, ..} => Some(1496), - Version{major: 3, minor: 8, ..} => Some(1384), - Version{major: 3, minor: 9..=10, ..} => Some(584), - Version{major: 3, minor: 11, ..} => Some(592), - _ => None + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1408), + Version { + major: 3, minor: 7, .. + } => Some(1496), + Version { + major: 3, minor: 8, .. + } => Some(1384), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(584), + Version { + major: 3, + minor: 11, + .. + } => Some(592), + _ => None, } } - #[cfg(all(target_os="linux", target_arch="x86_64"))] + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1392), - Version{major: 3, minor: 7, ..} => Some(1480), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1384), - "a2" => Some(840), - "a3" | "a4" => Some(1400), - _ => Some(1368) - } - }, - Version{major: 3, minor: 8, ..} => Some(1368), - Version{major: 3, minor: 9..=10, ..} => Some(568), - Version{major: 3, minor: 11, ..} => Some(576), - _ => None + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1392), + Version { + major: 3, minor: 7, .. + } => Some(1480), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1384), + "a2" => Some(840), + "a3" | "a4" => Some(1400), + _ => Some(1368), + }, + Version { + major: 3, minor: 8, .. + } => Some(1368), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(568), + Version { + major: 3, + minor: 11, + .. + } => Some(576), + _ => None, } } - #[cfg(all(target_os="linux", any(target_arch="powerpc64", target_arch="powerpc", target_arch="mips")))] + #[cfg(all( + target_os = "linux", + any( + target_arch = "powerpc64", + target_arch = "powerpc", + target_arch = "mips" + ) + ))] pub fn get_tstate_current_offset(version: &Version) -> Option { None } @@ -149,39 +252,80 @@ pub mod pyruntime { #[cfg(windows)] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1320), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1312), - "a2" => Some(768), - "a3" | "a4" => Some(1328), - _ => Some(1296) - } + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1320), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1312), + "a2" => Some(768), + "a3" | "a4" => Some(1328), + _ => Some(1296), }, - Version{major: 3, minor: 8, ..} => Some(1296), - Version{major: 3, minor: 9..=10, ..} => Some(496), - Version{major: 3, minor: 11, ..} => Some(504), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(1296), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(496), + Version { + major: 3, + minor: 11, + .. + } => Some(504), + _ => None, } } - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] pub fn get_tstate_current_offset(version: &Version) -> Option { match version { - Version{major: 3, minor: 7, patch: 0..=3, ..} => Some(1248), - Version{major: 3, minor: 7, patch: 4..=7, ..} => Some(1336), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" => Some(1240), - "a2" => Some(696), - "a3" | "a4" => Some(1256), - _ => Some(1224) - } + Version { + major: 3, + minor: 7, + patch: 0..=3, + .. + } => Some(1248), + Version { + major: 3, + minor: 7, + patch: 4..=7, + .. + } => Some(1336), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" => Some(1240), + "a2" => Some(696), + "a3" | "a4" => Some(1256), + _ => Some(1224), }, - Version{major: 3, minor: 8, ..} => Some(1224), - Version{major: 3, minor: 9..=10, ..} => Some(424), - Version{major: 3, minor: 11, ..} => Some(432), - _ => None + Version { + major: 3, minor: 8, .. + } => Some(1224), + Version { + major: 3, + minor: 9..=10, + .. + } => Some(424), + Version { + major: 3, + minor: 11, + .. + } => Some(432), + _ => None, } } } diff --git a/src/python_data_access.rs b/src/python_data_access.rs index b9ea7d83..6ea29d1d 100644 --- a/src/python_data_access.rs +++ b/src/python_data_access.rs @@ -2,15 +2,23 @@ use std; use anyhow::Error; -use remoteprocess::ProcessMemory; -use crate::python_interpreters::{StringObject, BytesObject, InterpreterState, Object, TypeObject, TupleObject, ListObject}; +use crate::python_interpreters::{ + BytesObject, InterpreterState, ListObject, Object, StringObject, TupleObject, TypeObject, +}; use crate::version::Version; +use remoteprocess::ProcessMemory; /// Copies a string from a target process. Attempts to handle unicode differences, which mostly seems to be working -pub fn copy_string(ptr: * const T, process: &P) -> Result { +pub fn copy_string( + ptr: *const T, + process: &P, +) -> Result { let obj = process.copy_pointer(ptr)?; if obj.size() >= 4096 { - return Err(format_err!("Refusing to copy {} chars of a string", obj.size())); + return Err(format_err!( + "Refusing to copy {} chars of a string", + obj.size() + )); } let kind = obj.kind(); @@ -20,23 +28,28 @@ pub fn copy_string(ptr: * const T, process: & match (kind, obj.ascii()) { (4, _) => { #[allow(clippy::cast_ptr_alignment)] - let chars = unsafe { std::slice::from_raw_parts(bytes.as_ptr() as * const char, bytes.len() / 4) }; + let chars = unsafe { + std::slice::from_raw_parts(bytes.as_ptr() as *const char, bytes.len() / 4) + }; Ok(chars.iter().collect()) - }, + } (2, _) => { // UCS2 strings aren't used internally after v3.3: https://www.python.org/dev/peps/pep-0393/ // TODO: however with python 2.7 they could be added with --enable-unicode=ucs2 configure flag. // or with python 3.2 --with-wide-unicode=ucs2 Err(format_err!("ucs2 strings aren't supported yet!")) - }, + } (1, true) => Ok(String::from_utf8(bytes)?), - (1, false) => Ok(bytes.iter().map(|&b| { b as char }).collect()), - _ => Err(format_err!("Unknown string kind {}", kind)) + (1, false) => Ok(bytes.iter().map(|&b| b as char).collect()), + _ => Err(format_err!("Unknown string kind {}", kind)), } } /// Copies data from a PyBytesObject (currently only lnotab object) -pub fn copy_bytes(ptr: * const T, process: &P) -> Result, Error> { +pub fn copy_bytes( + ptr: *const T, + process: &P, +) -> Result, Error> { let obj = process.copy_pointer(ptr)?; let size = obj.size(); if size >= 65536 { @@ -49,7 +62,8 @@ pub fn copy_bytes(ptr: * const T, process: &P) pub fn copy_long(process: &P, addr: usize) -> Result<(i64, bool), Error> { // this is PyLongObject for a specific version of python, but this works since it's binary compatible // layout across versions we're targeting - let value = process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyLongObject)?; + let value = + process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyLongObject)?; let negative: i64 = if value.ob_base.ob_size < 0 { -1 } else { 1 }; let size = value.ob_base.ob_size * (negative as isize); match size { @@ -75,13 +89,14 @@ pub fn copy_long(process: &P, addr: usize) -> Result<(i64, boo Ok((negative * ret, false)) } // we don't support arbitrary sized integers yet, signal this by returning that we've overflowed - _ => Ok((value.ob_base.ob_size as i64, true)) + _ => Ok((value.ob_base.ob_size as i64, true)), } } /// Copies a i64 from a python 2.7 PyIntObject pub fn copy_int(process: &P, addr: usize) -> Result { - let value = process.copy_pointer(addr as *const crate::python_bindings::v2_7_15::PyIntObject)?; + let value = + process.copy_pointer(addr as *const crate::python_bindings::v2_7_15::PyIntObject)?; Ok(value.ob_ival as i64) } @@ -93,20 +108,36 @@ pub struct DictIterator<'a, P: 'a> { kind: u8, index: usize, entries: usize, - values: usize + values: usize, } impl<'a, P: ProcessMemory> DictIterator<'a, P> { - pub fn from_managed_dict(process: &'a P, version: &'a Version, addr: usize, tp_addr: usize) -> Result, Error> { + pub fn from_managed_dict( + process: &'a P, + version: &'a Version, + addr: usize, + tp_addr: usize, + ) -> Result, Error> { // Handles logic of _PyObject_ManagedDictPointer in python 3.11 let values_addr: usize = process.copy_struct(addr - 4 * std::mem::size_of::())?; let dict_addr: usize = process.copy_struct(addr - 3 * std::mem::size_of::())?; if values_addr != 0 { - let ht: crate::python_bindings::v3_11_0::PyHeapTypeObject = process.copy_struct(tp_addr)?; - let keys: crate::python_bindings::v3_11_0::PyDictKeysObject = process.copy_struct(ht.ht_cached_keys as usize)?; - let entries_addr = ht.ht_cached_keys as usize + (1 << keys.dk_log2_index_bytes) + std::mem::size_of_val(&keys); - Ok(DictIterator{process, entries_addr, index: 0, kind: keys.dk_kind, entries: keys.dk_nentries as usize, values: values_addr}) + let ht: crate::python_bindings::v3_11_0::PyHeapTypeObject = + process.copy_struct(tp_addr)?; + let keys: crate::python_bindings::v3_11_0::PyDictKeysObject = + process.copy_struct(ht.ht_cached_keys as usize)?; + let entries_addr = ht.ht_cached_keys as usize + + (1 << keys.dk_log2_index_bytes) + + std::mem::size_of_val(&keys); + Ok(DictIterator { + process, + entries_addr, + index: 0, + kind: keys.dk_kind, + entries: keys.dk_nentries as usize, + values: values_addr, + }) } else if dict_addr != 0 { DictIterator::from(process, version, dict_addr) } else { @@ -114,16 +145,35 @@ impl<'a, P: ProcessMemory> DictIterator<'a, P> { } } - pub fn from(process: &'a P, version: &'a Version, addr: usize) -> Result, Error> { - match version { - Version{major: 3, minor: 11, ..} => { - let dict: crate::python_bindings::v3_11_0::PyDictObject = process.copy_struct(addr)?; + pub fn from( + process: &'a P, + version: &'a Version, + addr: usize, + ) -> Result, Error> { + match version { + Version { + major: 3, + minor: 11, + .. + } => { + let dict: crate::python_bindings::v3_11_0::PyDictObject = + process.copy_struct(addr)?; let keys = process.copy_pointer(dict.ma_keys)?; - let entries_addr = dict.ma_keys as usize + (1 << keys.dk_log2_index_bytes) + std::mem::size_of_val(&keys); - Ok(DictIterator{process, entries_addr, index: 0, kind: keys.dk_kind, entries: keys.dk_nentries as usize, values: dict.ma_values as usize}) - }, - _ => { - let dict: crate::python_bindings::v3_7_0::PyDictObject = process.copy_struct(addr)?; + let entries_addr = dict.ma_keys as usize + + (1 << keys.dk_log2_index_bytes) + + std::mem::size_of_val(&keys); + Ok(DictIterator { + process, + entries_addr, + index: 0, + kind: keys.dk_kind, + entries: keys.dk_nentries as usize, + values: dict.ma_values as usize, + }) + } + _ => { + let dict: crate::python_bindings::v3_7_0::PyDictObject = + process.copy_struct(addr)?; // Getting this going generically is tricky: there is a lot of variation on how dictionaries are handled // instead this just focuses on a single version, which works for python // 3.6/3.7/3.8/3.9/3.10 @@ -136,11 +186,19 @@ impl<'a, P: ProcessMemory> DictIterator<'a, P> { #[cfg(target_pointer_width = "64")] _ => 8, #[cfg(not(target_pointer_width = "64"))] - _ => 4 + _ => 4, }; let byteoffset = (keys.dk_size * index_size) as usize; - let entries_addr = dict.ma_keys as usize + byteoffset + std::mem::size_of_val(&keys); - Ok(DictIterator{process, entries_addr, index: 0, kind: 0, entries: keys.dk_nentries as usize, values: dict.ma_values as usize}) + let entries_addr = + dict.ma_keys as usize + byteoffset + std::mem::size_of_val(&keys); + Ok(DictIterator { + process, + entries_addr, + index: 0, + kind: 0, + entries: keys.dk_nentries as usize, + values: dict.ma_values as usize, + }) } } } @@ -157,14 +215,23 @@ impl<'a, P: ProcessMemory> Iterator for DictIterator<'a, P> { // get the addresses of the key/value for the current index let entry = match self.kind { 0 => { - let addr = index * std::mem::size_of::() + self.entries_addr; - let ret = self.process.copy_struct::(addr); + let addr = index + * std::mem::size_of::() + + self.entries_addr; + let ret = self + .process + .copy_struct::(addr); ret.map(|entry| (entry.me_key as usize, entry.me_value as usize)) - }, + } _ => { // Python 3.11 added a PyDictUnicodeEntry , which uses the hash from the Unicode key rather than recalculate - let addr = index * std::mem::size_of::() + self.entries_addr; - let ret = self.process.copy_struct::(addr); + let addr = index + * std::mem::size_of::( + ) + + self.entries_addr; + let ret = self + .process + .copy_struct::(addr); ret.map(|entry| (entry.me_key as usize, entry.me_value as usize)) } }; @@ -176,20 +243,23 @@ impl<'a, P: ProcessMemory> Iterator for DictIterator<'a, P> { } let value = if self.values != 0 { - let valueaddr = self.values + index * std::mem::size_of::<* mut crate::python_bindings::v3_7_0::PyObject>(); + let valueaddr = self.values + + index + * std::mem::size_of::<*mut crate::python_bindings::v3_7_0::PyObject>( + ); match self.process.copy_struct(valueaddr) { Ok(addr) => addr, - Err(e) => { return Some(Err(e.into())); } + Err(e) => { + return Some(Err(e.into())); + } } } else { value }; - return Some(Ok((key, value))) - }, - Err(e) => { - return Some(Err(e.into())) + return Some(Ok((key, value))); } + Err(e) => return Some(Err(e.into())), } } @@ -197,18 +267,26 @@ impl<'a, P: ProcessMemory> Iterator for DictIterator<'a, P> { } } -pub const PY_TPFLAGS_MANAGED_DICT: usize = 1 << 4; -const PY_TPFLAGS_INT_SUBCLASS: usize = 1 << 23; -const PY_TPFLAGS_LONG_SUBCLASS: usize = 1 << 24; -const PY_TPFLAGS_LIST_SUBCLASS: usize = 1 << 25; -const PY_TPFLAGS_TUPLE_SUBCLASS: usize = 1 << 26; -const PY_TPFLAGS_BYTES_SUBCLASS: usize = 1 << 27; +pub const PY_TPFLAGS_MANAGED_DICT: usize = 1 << 4; +const PY_TPFLAGS_INT_SUBCLASS: usize = 1 << 23; +const PY_TPFLAGS_LONG_SUBCLASS: usize = 1 << 24; +const PY_TPFLAGS_LIST_SUBCLASS: usize = 1 << 25; +const PY_TPFLAGS_TUPLE_SUBCLASS: usize = 1 << 26; +const PY_TPFLAGS_BYTES_SUBCLASS: usize = 1 << 27; const PY_TPFLAGS_STRING_SUBCLASS: usize = 1 << 28; -const PY_TPFLAGS_DICT_SUBCLASS: usize = 1 << 29; +const PY_TPFLAGS_DICT_SUBCLASS: usize = 1 << 29; /// Converts a python variable in the other process to a human readable string -pub fn format_variable(process: &P, version: &Version, addr: usize, max_length: isize) - -> Result where I: InterpreterState, P: ProcessMemory { +pub fn format_variable( + process: &P, + version: &Version, + addr: usize, + max_length: isize, +) -> Result +where + I: InterpreterState, + P: ProcessMemory, +{ // We need at least 5 characters remaining for all this code to work, replace with an ellipsis if // we're out of space if max_length <= 5 { @@ -221,7 +299,10 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le // get the typename (truncating to 128 bytes if longer) let max_type_len = 128; let value_type_name = process.copy(value_type.name() as usize, max_type_len)?; - let length = value_type_name.iter().position(|&x| x == 0).unwrap_or(max_type_len); + let length = value_type_name + .iter() + .position(|&x| x == 0) + .unwrap_or(max_type_len); let value_type_name = std::str::from_utf8(&value_type_name[..length])?; let format_int = |value: i64| { @@ -239,14 +320,21 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le } else if flags & PY_TPFLAGS_LONG_SUBCLASS != 0 { // we don't handle arbitrary sized integer values (max is 2**60) let (value, overflowed) = copy_long(process, addr)?; - if overflowed { - if value > 0 { "+bigint".to_owned() } else { "-bigint".to_owned() } + if overflowed { + if value > 0 { + "+bigint".to_owned() + } else { + "-bigint".to_owned() + } } else { format_int(value) } - } else if flags & PY_TPFLAGS_STRING_SUBCLASS != 0 || - (version.major == 2 && (flags & PY_TPFLAGS_BYTES_SUBCLASS != 0)) { - let value = copy_string(addr as *const I::StringObject, process)?.replace("\"", "\\\"").replace("\n", "\\n"); + } else if flags & PY_TPFLAGS_STRING_SUBCLASS != 0 + || (version.major == 2 && (flags & PY_TPFLAGS_BYTES_SUBCLASS != 0)) + { + let value = copy_string(addr as *const I::StringObject, process)? + .replace("\"", "\\\"") + .replace("\n", "\\n"); if value.len() as isize >= max_length - 5 { format!("\"{}...\"", &value[..(max_length - 5) as usize]) } else { @@ -278,7 +366,8 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le let mut values = Vec::new(); let mut remaining = max_length - 2; for i in 0..object.size() { - let valueptr: *mut I::Object = process.copy_struct(addr + i * std::mem::size_of::<* mut I::Object>())?; + let valueptr: *mut I::Object = + process.copy_struct(addr + i * std::mem::size_of::<*mut I::Object>())?; let value = format_variable::(process, version, valueptr as usize, remaining)?; remaining -= value.len() as isize + 2; if remaining <= 5 { @@ -304,7 +393,8 @@ pub fn format_variable(process: &P, version: &Version, addr: usize, max_le } format!("({})", values.join(", ")) } else if value_type_name == "float" { - let value = process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyFloatObject)?; + let value = + process.copy_pointer(addr as *const crate::python_bindings::v3_7_0::PyFloatObject)?; format!("{}", value.ob_fval) } else if value_type_name == "NoneType" { "None".to_owned() @@ -320,8 +410,10 @@ pub mod tests { // the idea here is to create various cpython interpretator structs locally // and then test out that the above code handles appropriately use super::*; + use crate::python_bindings::v3_7_0::{ + PyASCIIObject, PyBytesObject, PyUnicodeObject, PyVarObject, + }; use remoteprocess::LocalProcess; - use crate::python_bindings::v3_7_0::{PyBytesObject, PyVarObject, PyUnicodeObject, PyASCIIObject}; use std::ptr::copy_nonoverlapping; // python stores data after pybytesobject/pyasciiobject. hack by initializing a 4k buffer for testing. @@ -329,30 +421,51 @@ pub mod tests { #[allow(dead_code)] pub struct AllocatedPyByteObject { pub base: PyBytesObject, - pub storage: [u8; 4096] + pub storage: [u8; 4096], } #[allow(dead_code)] pub struct AllocatedPyASCIIObject { pub base: PyASCIIObject, - pub storage: [u8; 4096] + pub storage: [u8; 4096], } pub fn to_byteobject(bytes: &[u8]) -> AllocatedPyByteObject { let ob_size = bytes.len() as isize; - let base = PyBytesObject{ob_base: PyVarObject{ob_size, ..Default::default()}, ..Default::default()}; - let mut ret = AllocatedPyByteObject{base, storage: [0 as u8; 4096]}; - unsafe { copy_nonoverlapping(bytes.as_ptr(), ret.base.ob_sval.as_mut_ptr() as *mut u8, bytes.len()); } + let base = PyBytesObject { + ob_base: PyVarObject { + ob_size, + ..Default::default() + }, + ..Default::default() + }; + let mut ret = AllocatedPyByteObject { + base, + storage: [0 as u8; 4096], + }; + unsafe { + copy_nonoverlapping( + bytes.as_ptr(), + ret.base.ob_sval.as_mut_ptr() as *mut u8, + bytes.len(), + ); + } ret } pub fn to_asciiobject(input: &str) -> AllocatedPyASCIIObject { let bytes: Vec = input.bytes().collect(); - let mut base = PyASCIIObject{length: bytes.len() as isize, ..Default::default()}; + let mut base = PyASCIIObject { + length: bytes.len() as isize, + ..Default::default() + }; base.state.set_compact(1); base.state.set_kind(1); base.state.set_ascii(1); - let mut ret = AllocatedPyASCIIObject{base, storage: [0 as u8; 4096]}; + let mut ret = AllocatedPyASCIIObject { + base, + storage: [0 as u8; 4096], + }; unsafe { let ptr = &mut ret as *mut AllocatedPyASCIIObject as *mut u8; let dst = ptr.offset(std::mem::size_of::() as isize); @@ -366,7 +479,7 @@ pub mod tests { let original = "function_name"; let obj = to_asciiobject(original); - let unicode: &PyUnicodeObject = unsafe{ std::mem::transmute(&obj.base) }; + let unicode: &PyUnicodeObject = unsafe { std::mem::transmute(&obj.base) }; let copied = copy_string(unicode, &LocalProcess).unwrap(); assert_eq!(copied, original); } diff --git a/src/python_interpreters.rs b/src/python_interpreters.rs index 9efc79bf..111a2eed 100644 --- a/src/python_interpreters.rs +++ b/src/python_interpreters.rs @@ -9,7 +9,9 @@ This means we can't dereference them directly. // these bindings are automatically generated by rust bindgen // using the generate_bindings.py script -use crate::python_bindings::{v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{ + v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; use std; @@ -19,7 +21,7 @@ pub trait InterpreterState { type StringObject: StringObject; type ListObject: ListObject; type TupleObject: TupleObject; - fn head(&self) -> * mut Self::ThreadState; + fn head(&self) -> *mut Self::ThreadState; fn modules(&self) -> *mut Self::Object; } @@ -27,24 +29,24 @@ pub trait ThreadState { type FrameObject: FrameObject; type InterpreterState: InterpreterState; - fn interp(&self) -> * mut Self::InterpreterState; + fn interp(&self) -> *mut Self::InterpreterState; // starting in python 3.11, there is an extra level of indirection // in getting the frame. this returns the address fn frame_address(&self) -> Option; - fn frame(&self, offset: Option) -> * mut Self::FrameObject; + fn frame(&self, offset: Option) -> *mut Self::FrameObject; fn thread_id(&self) -> u64; fn native_thread_id(&self) -> Option; - fn next(&self) -> * mut Self; + fn next(&self) -> *mut Self; } pub trait FrameObject { type CodeObject: CodeObject; - fn code(&self) -> * mut Self::CodeObject; + fn code(&self) -> *mut Self::CodeObject; fn lasti(&self) -> i32; - fn back(&self) -> * mut Self; + fn back(&self) -> *mut Self; } pub trait CodeObject { @@ -52,13 +54,13 @@ pub trait CodeObject { type BytesObject: BytesObject; type TupleObject: TupleObject; - fn name(&self) -> * mut Self::StringObject; - fn filename(&self) -> * mut Self::StringObject; - fn line_table(&self) -> * mut Self::BytesObject; + fn name(&self) -> *mut Self::StringObject; + fn filename(&self) -> *mut Self::StringObject; + fn line_table(&self) -> *mut Self::BytesObject; fn first_lineno(&self) -> i32; fn nlocals(&self) -> i32; fn argcount(&self) -> i32; - fn varnames(&self) -> * mut Self::TupleObject; + fn varnames(&self) -> *mut Self::TupleObject; fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32; } @@ -88,7 +90,7 @@ pub trait ListObject { pub trait Object { type TypeObject: TypeObject; - fn ob_type(&self) -> * mut Self::TypeObject; + fn ob_type(&self) -> *mut Self::TypeObject; } pub trait TypeObject { @@ -105,64 +107,108 @@ fn offset_of(object: *const T, member: *const M) -> usize { /// (this code is identical across python versions, we are only abstracting the struct layouts here). /// String handling changes substantially between python versions, and is handled separately. macro_rules! PythonCommonImpl { - ($py: ident, $stringobject: ident) => ( + ($py: ident, $stringobject: ident) => { impl InterpreterState for $py::PyInterpreterState { type ThreadState = $py::PyThreadState; type Object = $py::PyObject; type StringObject = $py::$stringobject; type ListObject = $py::PyListObject; type TupleObject = $py::PyTupleObject; - fn head(&self) -> * mut Self::ThreadState { self.tstate_head } - fn modules(&self) -> * mut Self::Object { self.modules } + fn head(&self) -> *mut Self::ThreadState { + self.tstate_head + } + fn modules(&self) -> *mut Self::Object { + self.modules + } } impl ThreadState for $py::PyThreadState { type FrameObject = $py::PyFrameObject; type InterpreterState = $py::PyInterpreterState; - fn frame_address(&self) -> Option { None } - fn frame(&self, _: Option) -> * mut Self::FrameObject { self.frame } - fn thread_id(&self) -> u64 { self.thread_id as u64 } - fn native_thread_id(&self) -> Option { None } - fn next(&self) -> * mut Self { self.next } - fn interp(&self) -> *mut Self::InterpreterState { self.interp } + fn frame_address(&self) -> Option { + None + } + fn frame(&self, _: Option) -> *mut Self::FrameObject { + self.frame + } + fn thread_id(&self) -> u64 { + self.thread_id as u64 + } + fn native_thread_id(&self) -> Option { + None + } + fn next(&self) -> *mut Self { + self.next + } + fn interp(&self) -> *mut Self::InterpreterState { + self.interp + } } impl FrameObject for $py::PyFrameObject { type CodeObject = $py::PyCodeObject; - fn code(&self) -> * mut Self::CodeObject { self.f_code } - fn lasti(&self) -> i32 { self.f_lasti as i32 } - fn back(&self) -> * mut Self { self.f_back } + fn code(&self) -> *mut Self::CodeObject { + self.f_code + } + fn lasti(&self) -> i32 { + self.f_lasti as i32 + } + fn back(&self) -> *mut Self { + self.f_back + } } impl Object for $py::PyObject { type TypeObject = $py::PyTypeObject; - fn ob_type(&self) -> * mut Self::TypeObject { self.ob_type as * mut Self::TypeObject } + fn ob_type(&self) -> *mut Self::TypeObject { + self.ob_type as *mut Self::TypeObject + } } impl TypeObject for $py::PyTypeObject { - fn name(&self) -> *const ::std::os::raw::c_char { self.tp_name } - fn dictoffset(&self) -> isize { self.tp_dictoffset } - fn flags(&self) -> usize { self.tp_flags as usize } + fn name(&self) -> *const ::std::os::raw::c_char { + self.tp_name + } + fn dictoffset(&self) -> isize { + self.tp_dictoffset + } + fn flags(&self) -> usize { + self.tp_flags as usize + } } - ) + }; } // We can use this up until python3.10 - where code object lnotab attribute is deprecated macro_rules! PythonCodeObjectImpl { - ($py: ident, $bytesobject: ident, $stringobject: ident) => ( + ($py: ident, $bytesobject: ident, $stringobject: ident) => { impl CodeObject for $py::PyCodeObject { type BytesObject = $py::$bytesobject; type StringObject = $py::$stringobject; type TupleObject = $py::PyTupleObject; - fn name(&self) -> * mut Self::StringObject { self.co_name as * mut Self::StringObject } - fn filename(&self) -> * mut Self::StringObject { self.co_filename as * mut Self::StringObject } - fn line_table(&self) -> * mut Self::BytesObject { self.co_lnotab as * mut Self::BytesObject } - fn first_lineno(&self) -> i32 { self.co_firstlineno } - fn nlocals(&self) -> i32 { self.co_nlocals } - fn argcount(&self) -> i32 { self.co_argcount } - fn varnames(&self) -> * mut Self::TupleObject { self.co_varnames as * mut Self::TupleObject } + fn name(&self) -> *mut Self::StringObject { + self.co_name as *mut Self::StringObject + } + fn filename(&self) -> *mut Self::StringObject { + self.co_filename as *mut Self::StringObject + } + fn line_table(&self) -> *mut Self::BytesObject { + self.co_lnotab as *mut Self::BytesObject + } + fn first_lineno(&self) -> i32 { + self.co_firstlineno + } + fn nlocals(&self) -> i32 { + self.co_nlocals + } + fn argcount(&self) -> i32 { + self.co_argcount + } + fn varnames(&self) -> *mut Self::TupleObject { + self.co_varnames as *mut Self::TupleObject + } fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { let lasti = lasti as i32; @@ -191,27 +237,35 @@ macro_rules! PythonCodeObjectImpl { line_number } } - ) + }; } // String/Byte/List/Tuple handling for Python 3.3+ macro_rules! Python3Impl { - ($py: ident) => ( + ($py: ident) => { impl BytesObject for $py::PyBytesObject { - fn size(&self) -> usize { self.ob_base.ob_size as usize } + fn size(&self) -> usize { + self.ob_base.ob_size as usize + } fn address(&self, base: usize) -> usize { base + offset_of(self, &self.ob_sval) } } impl StringObject for $py::PyUnicodeObject { - fn ascii(&self) -> bool { self._base._base.state.ascii() != 0 } - fn size(&self) -> usize { self._base._base.length as usize } - fn kind(&self) -> u32 { self._base._base.state.kind() } + fn ascii(&self) -> bool { + self._base._base.state.ascii() != 0 + } + fn size(&self) -> usize { + self._base._base.length as usize + } + fn kind(&self) -> u32 { + self._base._base.state.kind() + } fn address(&self, base: usize) -> usize { if self._base._base.state.compact() == 0 { - return unsafe{ self.data.any as usize }; + return unsafe { self.data.any as usize }; } if self._base._base.state.ascii() == 1 { @@ -224,17 +278,24 @@ macro_rules! Python3Impl { impl ListObject for $py::PyListObject { type Object = $py::PyObject; - fn size(&self) -> usize { self.ob_base.ob_size as usize } - fn item(&self) -> *mut *mut Self::Object { self.ob_item } + fn size(&self) -> usize { + self.ob_base.ob_size as usize + } + fn item(&self) -> *mut *mut Self::Object { + self.ob_item + } } impl TupleObject for $py::PyTupleObject { - fn size(&self) -> usize { self.ob_base.ob_size as usize } + fn size(&self) -> usize { + self.ob_base.ob_size as usize + } fn address(&self, base: usize, index: usize) -> usize { - base + offset_of(self, &self.ob_item) + index * std::mem::size_of::<* mut $py::PyObject>() + base + offset_of(self, &self.ob_item) + + index * std::mem::size_of::<*mut $py::PyObject>() } } - ) + }; } // Python 3.11 // Python3.11 is sufficiently different from previous versions that we can't use the macros above @@ -247,8 +308,12 @@ impl InterpreterState for v3_11_0::PyInterpreterState { type StringObject = v3_11_0::PyUnicodeObject; type ListObject = v3_11_0::PyListObject; type TupleObject = v3_11_0::PyTupleObject; - fn head(&self) -> * mut Self::ThreadState { self.threads.head } - fn modules(&self) -> * mut Self::Object { self.modules } + fn head(&self) -> *mut Self::ThreadState { + self.threads.head + } + fn modules(&self) -> *mut Self::Object { + self.modules + } } impl ThreadState for v3_11_0::PyThreadState { @@ -260,35 +325,57 @@ impl ThreadState for v3_11_0::PyThreadState { let current_frame_offset = offset_of(&cframe, &cframe.current_frame); Some(self.cframe as usize + current_frame_offset) } - fn frame(&self, addr: Option) -> * mut Self::FrameObject { addr.unwrap() as * mut Self::FrameObject } - fn thread_id(&self) -> u64 { self.thread_id as u64 } - fn native_thread_id(&self) -> Option { Some(self.native_thread_id as u64) } - fn next(&self) -> * mut Self { self.next } - fn interp(&self) -> *mut Self::InterpreterState { self.interp } + fn frame(&self, addr: Option) -> *mut Self::FrameObject { + addr.unwrap() as *mut Self::FrameObject + } + fn thread_id(&self) -> u64 { + self.thread_id as u64 + } + fn native_thread_id(&self) -> Option { + Some(self.native_thread_id as u64) + } + fn next(&self) -> *mut Self { + self.next + } + fn interp(&self) -> *mut Self::InterpreterState { + self.interp + } } impl FrameObject for v3_11_0::_PyInterpreterFrame { type CodeObject = v3_11_0::PyCodeObject; - fn code(&self) -> * mut Self::CodeObject { self.f_code } + fn code(&self) -> *mut Self::CodeObject { + self.f_code + } fn lasti(&self) -> i32 { // this returns the delta from the co_code, but we need to adjust for the // offset from co_code.co_code_adaptive. This is slightly easier to do in the // get_line_number code, so will adjust there - let co_code = self.f_code as * const _ as * const u8; - unsafe { (self.prev_instr as * const u8).offset_from(co_code) as i32} + let co_code = self.f_code as *const _ as *const u8; + unsafe { (self.prev_instr as *const u8).offset_from(co_code) as i32 } + } + fn back(&self) -> *mut Self { + self.previous } - fn back(&self) -> * mut Self { self.previous } } impl Object for v3_11_0::PyObject { type TypeObject = v3_11_0::PyTypeObject; - fn ob_type(&self) -> * mut Self::TypeObject { self.ob_type as * mut Self::TypeObject } + fn ob_type(&self) -> *mut Self::TypeObject { + self.ob_type as *mut Self::TypeObject + } } impl TypeObject for v3_11_0::PyTypeObject { - fn name(&self) -> *const ::std::os::raw::c_char { self.tp_name } - fn dictoffset(&self) -> isize { self.tp_dictoffset } - fn flags(&self) -> usize { self.tp_flags as usize } + fn name(&self) -> *const ::std::os::raw::c_char { + self.tp_name + } + fn dictoffset(&self) -> isize { + self.tp_dictoffset + } + fn flags(&self) -> usize { + self.tp_flags as usize + } } fn read_varint(index: &mut usize, table: &[u8]) -> usize { @@ -321,13 +408,27 @@ impl CodeObject for v3_11_0::PyCodeObject { type StringObject = v3_11_0::PyUnicodeObject; type TupleObject = v3_11_0::PyTupleObject; - fn name(&self) -> * mut Self::StringObject { self.co_name as * mut Self::StringObject } - fn filename(&self) -> * mut Self::StringObject { self.co_filename as * mut Self::StringObject } - fn line_table(&self) -> * mut Self::BytesObject { self.co_linetable as * mut Self::BytesObject } - fn first_lineno(&self) -> i32 { self.co_firstlineno } - fn nlocals(&self) -> i32 { self.co_nlocals } - fn argcount(&self) -> i32 { self.co_argcount } - fn varnames(&self) -> * mut Self::TupleObject { self.co_localsplusnames as * mut Self::TupleObject } + fn name(&self) -> *mut Self::StringObject { + self.co_name as *mut Self::StringObject + } + fn filename(&self) -> *mut Self::StringObject { + self.co_filename as *mut Self::StringObject + } + fn line_table(&self) -> *mut Self::BytesObject { + self.co_linetable as *mut Self::BytesObject + } + fn first_lineno(&self) -> i32 { + self.co_firstlineno + } + fn nlocals(&self) -> i32 { + self.co_nlocals + } + fn argcount(&self) -> i32 { + self.co_argcount + } + fn varnames(&self) -> *mut Self::TupleObject { + self.co_localsplusnames as *mut Self::TupleObject + } fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { // unpack compressed table format from python 3.11 @@ -348,21 +449,19 @@ impl CodeObject for v3_11_0::PyCodeObject { bytecode_address += delta * 2; let code = (byte >> 3) & 15; let line_delta = match code { - 15 => { 0 }, + 15 => 0, 14 => { let delta = read_signed_varint(&mut index, table); read_varint(&mut index, table); // end line read_varint(&mut index, table); // start column read_varint(&mut index, table); // end column delta - }, - 13 => { - read_signed_varint(&mut index, table) - }, + } + 13 => read_signed_varint(&mut index, table), 10..=12 => { index += 2; // start column / end column (code - 10).into() - }, + } _ => { index += 1; // column 0 @@ -377,7 +476,6 @@ impl CodeObject for v3_11_0::PyCodeObject { } } - // Python 3.10 Python3Impl!(v3_10_0); PythonCommonImpl!(v3_10_0, PyUnicodeObject); @@ -387,47 +485,59 @@ impl CodeObject for v3_10_0::PyCodeObject { type StringObject = v3_10_0::PyUnicodeObject; type TupleObject = v3_10_0::PyTupleObject; - fn name(&self) -> * mut Self::StringObject { self.co_name as * mut Self::StringObject } - fn filename(&self) -> * mut Self::StringObject { self.co_filename as * mut Self::StringObject } - fn line_table(&self) -> * mut Self::BytesObject { self.co_linetable as * mut Self::BytesObject } - fn first_lineno(&self) -> i32 { self.co_firstlineno } - fn nlocals(&self) -> i32 { self.co_nlocals } - fn argcount(&self) -> i32 { self.co_argcount } - fn varnames(&self) -> * mut Self::TupleObject { self.co_varnames as * mut Self::TupleObject } - fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { - // in Python 3.10 we need to double the lasti instruction value here (and no I don't know why) - // https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999 - // Whereas in python versions up to 3.9 we didn't. - // https://github.com/python/cpython/blob/3.9/Python/ceval.c#L4713-L4714 - let lasti = 2 * lasti as i32; - - // unpack the line table. format is specified here: - // https://github.com/python/cpython/blob/3.10/Objects/lnotab_notes.txt - let size = table.len(); - let mut i = 0; - let mut line_number: i32 = self.first_lineno(); - let mut bytecode_address: i32 = 0; - while (i + 1) < size { - let delta: u8 = table[i]; - let line_delta: i8 = unsafe { std::mem::transmute(table[i + 1]) }; - i += 2; - - if line_delta == -128 { - continue; - } - - line_number += i32::from(line_delta); - bytecode_address += i32::from(delta); - if bytecode_address > lasti { - break; - } - } - - line_number + fn name(&self) -> *mut Self::StringObject { + self.co_name as *mut Self::StringObject } -} + fn filename(&self) -> *mut Self::StringObject { + self.co_filename as *mut Self::StringObject + } + fn line_table(&self) -> *mut Self::BytesObject { + self.co_linetable as *mut Self::BytesObject + } + fn first_lineno(&self) -> i32 { + self.co_firstlineno + } + fn nlocals(&self) -> i32 { + self.co_nlocals + } + fn argcount(&self) -> i32 { + self.co_argcount + } + fn varnames(&self) -> *mut Self::TupleObject { + self.co_varnames as *mut Self::TupleObject + } + fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { + // in Python 3.10 we need to double the lasti instruction value here (and no I don't know why) + // https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999 + // Whereas in python versions up to 3.9 we didn't. + // https://github.com/python/cpython/blob/3.9/Python/ceval.c#L4713-L4714 + let lasti = 2 * lasti as i32; + + // unpack the line table. format is specified here: + // https://github.com/python/cpython/blob/3.10/Objects/lnotab_notes.txt + let size = table.len(); + let mut i = 0; + let mut line_number: i32 = self.first_lineno(); + let mut bytecode_address: i32 = 0; + while (i + 1) < size { + let delta: u8 = table[i]; + let line_delta: i8 = unsafe { std::mem::transmute(table[i + 1]) }; + i += 2; + + if line_delta == -128 { + continue; + } + line_number += i32::from(line_delta); + bytecode_address += i32::from(delta); + if bytecode_address > lasti { + break; + } + } + line_number + } +} // Python 3.9 PythonCommonImpl!(v3_9_5, PyUnicodeObject); @@ -463,27 +573,46 @@ Python3Impl!(v3_3_7); PythonCommonImpl!(v2_7_15, PyStringObject); PythonCodeObjectImpl!(v2_7_15, PyStringObject, PyStringObject); impl BytesObject for v2_7_15::PyStringObject { - fn size(&self) -> usize { self.ob_size as usize } - fn address(&self, base: usize) -> usize { base + offset_of(self, &self.ob_sval) } + fn size(&self) -> usize { + self.ob_size as usize + } + fn address(&self, base: usize) -> usize { + base + offset_of(self, &self.ob_sval) + } } impl StringObject for v2_7_15::PyStringObject { - fn ascii(&self) -> bool { true } - fn kind(&self) -> u32 { 1 } - fn size(&self) -> usize { self.ob_size as usize } - fn address(&self, base: usize) -> usize { base + offset_of(self, &self.ob_sval) } + fn ascii(&self) -> bool { + true + } + fn kind(&self) -> u32 { + 1 + } + fn size(&self) -> usize { + self.ob_size as usize + } + fn address(&self, base: usize) -> usize { + base + offset_of(self, &self.ob_sval) + } } impl ListObject for v2_7_15::PyListObject { type Object = v2_7_15::PyObject; - fn size(&self) -> usize { self.ob_size as usize } - fn item(&self) -> *mut *mut Self::Object { self.ob_item } + fn size(&self) -> usize { + self.ob_size as usize + } + fn item(&self) -> *mut *mut Self::Object { + self.ob_item + } } impl TupleObject for v2_7_15::PyTupleObject { - fn size(&self) -> usize { self.ob_size as usize } + fn size(&self) -> usize { + self.ob_size as usize + } fn address(&self, base: usize, index: usize) -> usize { - base + offset_of(self, &self.ob_item) + index * std::mem::size_of::<* mut v2_7_15::PyObject>() + base + offset_of(self, &self.ob_item) + + index * std::mem::size_of::<*mut v2_7_15::PyObject>() } } @@ -494,11 +623,15 @@ mod tests { #[test] fn test_py3_11_line_numbers() { use crate::python_bindings::v3_11_0::PyCodeObject; - let code = PyCodeObject {co_firstlineno:4, ..Default::default()}; - - let table = [128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, - 208, 4, 22, 208, 4, 22]; + let code = PyCodeObject { + co_firstlineno: 4, + ..Default::default() + }; + + let table = [ + 128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4, + 22, 208, 4, 22, + ]; assert_eq!(code.get_line_number(214, &table), 5); - } } diff --git a/src/python_process_info.rs b/src/python_process_info.rs index 76805c5f..11b217d8 100644 --- a/src/python_process_info.rs +++ b/src/python_process_info.rs @@ -1,26 +1,27 @@ +use regex::Regex; +#[cfg(windows)] +use regex::RegexBuilder; use std; #[cfg(windows)] use std::collections::HashMap; use std::mem::size_of; -use std::slice; use std::path::Path; -use regex::Regex; -#[cfg(windows)] -use regex::RegexBuilder; +use std::slice; -use anyhow::{Error, Result, Context}; +use anyhow::{Context, Error, Result}; use lazy_static::lazy_static; -use remoteprocess::{ProcessMemory, Pid}; use proc_maps::{get_process_maps, MapRange}; +use remoteprocess::{Pid, ProcessMemory}; use crate::binary_parser::{parse_binary, BinaryInfo}; use crate::config::Config; -use crate::python_bindings::{pyruntime, v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{ + pyruntime, v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; use crate::python_interpreters::{InterpreterState, ThreadState}; use crate::stack_trace::get_stack_traces; use crate::version::Version; - /// Holds information about the python process: memory map layout, parsed binary info /// for python /libpython etc. pub struct PythonProcessInfo { @@ -30,13 +31,14 @@ pub struct PythonProcessInfo { pub libpython_binary: Option, pub maps: Box, pub python_filename: std::path::PathBuf, - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] pub dockerized: bool, } impl PythonProcessInfo { pub fn new(process: &remoteprocess::Process) -> Result { - let filename = process.exe() + let filename = process + .exe() .context("Failed to get process executable name. Check that the process is running.")?; #[cfg(windows)] @@ -52,23 +54,30 @@ impl PythonProcessInfo { let maps = get_process_maps(process.pid)?; info!("Got virtual memory maps from pid {}:", process.pid); for map in &maps { - debug!("map: {:016x}-{:016x} {}{}{} {}", map.start(), map.start() + map.size(), - if map.is_read() {'r'} else {'-'}, if map.is_write() {'w'} else {'-'}, if map.is_exec() {'x'} else {'-'}, - map.filename().unwrap_or(&std::path::PathBuf::from("")).display()); + debug!( + "map: {:016x}-{:016x} {}{}{} {}", + map.start(), + map.start() + map.size(), + if map.is_read() { 'r' } else { '-' }, + if map.is_write() { 'w' } else { '-' }, + if map.is_exec() { 'x' } else { '-' }, + map.filename() + .unwrap_or(&std::path::PathBuf::from("")) + .display() + ); } // parse the main python binary let (python_binary, python_filename) = { // Get the memory address for the executable by matching against virtual memory maps - let map = maps.iter() - .find(|m| { - if let Some(pathname) = m.filename() { - if let Some(pathname) = pathname.to_str() { - return is_python_bin(pathname) && m.is_exec(); - } + let map = maps.iter().find(|m| { + if let Some(pathname) = m.filename() { + if let Some(pathname) = pathname.to_str() { + return is_python_bin(pathname) && m.is_exec(); } - false - }); + } + false + }); let map = match map { Some(map) => map, @@ -77,16 +86,18 @@ impl PythonProcessInfo { // If we failed to find the executable in the virtual memory maps, just take the first file we find // sometimes on windows get_process_exe returns stale info =( https://github.com/benfred/py-spy/issues/40 // and on all operating systems I've tried, the exe is the first region in the maps - &maps.first().ok_or_else(|| format_err!("Failed to get virtual memory maps from process"))? + &maps.first().ok_or_else(|| { + format_err!("Failed to get virtual memory maps from process") + })? } }; - #[cfg(not(target_os="linux"))] + #[cfg(not(target_os = "linux"))] let filename = std::path::PathBuf::from(filename); // use filename through /proc/pid/exe which works across docker namespaces and // handles if the file was deleted - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let filename = &std::path::PathBuf::from(format!("/proc/{}/exe", process.pid)); // TODO: consistent types? u64 -> usize? for map.start etc @@ -97,7 +108,10 @@ impl PythonProcessInfo { #[cfg(windows)] { get_windows_python_symbols(process.pid, &filename, map.start() as u64) - .map(|symbols| { pb.symbols.extend(symbols); pb }) + .map(|symbols| { + pb.symbols.extend(symbols); + pb + }) .map_err(|err| err.into()) } @@ -124,15 +138,14 @@ impl PythonProcessInfo { // likewise handle libpython for python versions compiled with --enabled-shared let libpython_binary = { - let libmap = maps.iter() - .find(|m| { - if let Some(pathname) = m.filename() { - if let Some(pathname) = pathname.to_str() { - return is_python_lib(pathname) && m.is_exec(); - } + let libmap = maps.iter().find(|m| { + if let Some(pathname) = m.filename() { + if let Some(pathname) = pathname.to_str() { + return is_python_lib(pathname) && m.is_exec(); } - false - }); + } + false + }); let mut libpython_binary: Option = None; if let Some(libpython) = libmap { @@ -140,13 +153,22 @@ impl PythonProcessInfo { info!("Found libpython binary @ {}", filename.display()); // on linux the process could be running in docker, access the filename through procfs - #[cfg(target_os="linux")] - let filename = &std::path::PathBuf::from(format!("/proc/{}/root{}", process.pid, filename.display())); + #[cfg(target_os = "linux")] + let filename = &std::path::PathBuf::from(format!( + "/proc/{}/root{}", + process.pid, + filename.display() + )); #[allow(unused_mut)] - let mut parsed = parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; + let mut parsed = + parse_binary(filename, libpython.start() as u64, libpython.size() as u64)?; #[cfg(windows)] - parsed.symbols.extend(get_windows_python_symbols(process.pid, filename, libpython.start() as u64)?); + parsed.symbols.extend(get_windows_python_symbols( + process.pid, + filename, + libpython.start() as u64, + )?); libpython_binary = Some(parsed); } } @@ -161,26 +183,36 @@ impl PythonProcessInfo { let dyld_infos = get_dyld_info(process.pid)?; for dyld in &dyld_infos { - let segname = unsafe { std::ffi::CStr::from_ptr(dyld.segment.segname.as_ptr()) }; - debug!("dyld: {:016x}-{:016x} {:10} {}", - dyld.segment.vmaddr, dyld.segment.vmaddr + dyld.segment.vmsize, - segname.to_string_lossy(), dyld.filename.display()); + let segname = + unsafe { std::ffi::CStr::from_ptr(dyld.segment.segname.as_ptr()) }; + debug!( + "dyld: {:016x}-{:016x} {:10} {}", + dyld.segment.vmaddr, + dyld.segment.vmaddr + dyld.segment.vmsize, + segname.to_string_lossy(), + dyld.filename.display() + ); } - let python_dyld_data = dyld_infos.iter() - .find(|m| { - if let Some(filename) = m.filename.to_str() { - return is_python_framework(filename) && - m.segment.segname[0..7] == [95, 95, 68, 65, 84, 65, 0]; - } - false - }); - + let python_dyld_data = dyld_infos.iter().find(|m| { + if let Some(filename) = m.filename.to_str() { + return is_python_framework(filename) + && m.segment.segname[0..7] == [95, 95, 68, 65, 84, 65, 0]; + } + false + }); if let Some(libpython) = python_dyld_data { - info!("Found libpython binary from dyld @ {}", libpython.filename.display()); + info!( + "Found libpython binary from dyld @ {}", + libpython.filename.display() + ); - let mut binary = parse_binary(&libpython.filename, libpython.segment.vmaddr, libpython.segment.vmsize)?; + let mut binary = parse_binary( + &libpython.filename, + libpython.segment.vmaddr, + libpython.segment.vmsize, + )?; // TODO: bss addr offsets returned from parsing binary are wrong // (assumes data section isn't split from text section like done here). @@ -202,12 +234,16 @@ impl PythonProcessInfo { _ => python_binary.ok(), }; - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let dockerized = is_dockerized(process.pid).unwrap_or(false); - Ok(PythonProcessInfo{python_binary, libpython_binary, maps: Box::new(maps), python_filename, - #[cfg(target_os="linux")] - dockerized + Ok(PythonProcessInfo { + python_binary, + libpython_binary, + maps: Box::new(maps), + python_filename, + #[cfg(target_os = "linux")] + dockerized, }) } @@ -221,7 +257,10 @@ impl PythonProcessInfo { if let Some(ref binary) = self.libpython_binary { if let Some(addr) = binary.symbols.get(symbol) { - info!("got symbol {} (0x{:016x}) from libpython binary", symbol, addr); + info!( + "got symbol {} (0x{:016x}) from libpython binary", + symbol, addr + ); return Some(addr); } } @@ -231,7 +270,9 @@ impl PythonProcessInfo { /// Returns the version of python running in the process. pub fn get_python_version

(python_info: &PythonProcessInfo, process: &P) -> Result - where P: ProcessMemory { +where + P: ProcessMemory, +{ // If possible, grab the sys.version string from the processes memory (mac osx). if let Some(&addr) = python_info.get_symbol("Py_GetVersion.version") { info!("Getting version from symbol address"); @@ -245,68 +286,97 @@ pub fn get_python_version

(python_info: &PythonProcessInfo, process: &P) -> Re // otherwise get version info from scanning BSS section for sys.version string if let Some(ref pb) = python_info.python_binary { info!("Getting version from python binary BSS"); - let bss = process.copy(pb.bss_addr as usize, - pb.bss_size as usize)?; + let bss = process.copy(pb.bss_addr as usize, pb.bss_size as usize)?; match Version::scan_bytes(&bss) { Ok(version) => return Ok(version), - Err(err) => info!("Failed to get version from BSS section: {}", err) + Err(err) => info!("Failed to get version from BSS section: {}", err), } } // try again if there is a libpython.so if let Some(ref libpython) = python_info.libpython_binary { info!("Getting version from libpython BSS"); - let bss = process.copy(libpython.bss_addr as usize, - libpython.bss_size as usize)?; + let bss = process.copy(libpython.bss_addr as usize, libpython.bss_size as usize)?; match Version::scan_bytes(&bss) { Ok(version) => return Ok(version), - Err(err) => info!("Failed to get version from libpython BSS section: {}", err) + Err(err) => info!("Failed to get version from libpython BSS section: {}", err), } } // the python_filename might have the version encoded in it (/usr/bin/python3.5 etc). // try reading that in (will miss patch level on python, but that shouldn't matter) - info!("Trying to get version from path: {}", python_info.python_filename.display()); + info!( + "Trying to get version from path: {}", + python_info.python_filename.display() + ); let path = Path::new(&python_info.python_filename); if let Some(python) = path.file_name() { if let Some(python) = python.to_str() { if python.starts_with("python") { let tokens: Vec<&str> = python[6..].split('.').collect(); if tokens.len() >= 2 { - if let (Ok(major), Ok(minor)) = (tokens[0].parse::(), tokens[1].parse::()) { - return Ok(Version{major, minor, patch:0, release_flags: "".to_owned()}) + if let (Ok(major), Ok(minor)) = + (tokens[0].parse::(), tokens[1].parse::()) + { + return Ok(Version { + major, + minor, + patch: 0, + release_flags: "".to_owned(), + }); } } } } } - Err(format_err!("Failed to find python version from target process")) + Err(format_err!( + "Failed to find python version from target process" + )) } -pub fn get_interpreter_address

(python_info: &PythonProcessInfo, - process: &P, - version: &Version) -> Result - where P: ProcessMemory { +pub fn get_interpreter_address

( + python_info: &PythonProcessInfo, + process: &P, + version: &Version, +) -> Result +where + P: ProcessMemory, +{ // get the address of the main PyInterpreterState object from loaded symbols if we can // (this tends to be faster than scanning through the bss section) match version { - Version{major: 3, minor: 7..=11, ..} => { + Version { + major: 3, + minor: 7..=11, + .. + } => { if let Some(&addr) = python_info.get_symbol("_PyRuntime") { - let addr = process.copy_struct(addr as usize + pyruntime::get_interp_head_offset(&version))?; + let addr = process + .copy_struct(addr as usize + pyruntime::get_interp_head_offset(&version))?; // Make sure the interpreter addr is valid before returning match check_interpreter_addresses(&[addr], &*python_info.maps, process, version) { Ok(addr) => return Ok(addr), - Err(_) => { warn!("Interpreter address from _PyRuntime symbol is invalid {:016x}", addr); } + Err(_) => { + warn!( + "Interpreter address from _PyRuntime symbol is invalid {:016x}", + addr + ); + } }; } - }, + } _ => { if let Some(&addr) = python_info.get_symbol("interp_head") { let addr = process.copy_struct(addr as usize)?; match check_interpreter_addresses(&[addr], &*python_info.maps, process, version) { Ok(addr) => return Ok(addr), - Err(_) => { warn!("Interpreter address from interp_head symbol is invalid {:016x}", addr); } + Err(_) => { + warn!( + "Interpreter address from interp_head symbol is invalid {:016x}", + addr + ); + } }; } } @@ -314,59 +384,70 @@ pub fn get_interpreter_address

(python_info: &PythonProcessInfo, info!("Failed to get interp_head from symbols, scanning BSS section from main binary"); // try scanning the BSS section of the binary for things that might be the interpreterstate - let err = - if let Some(ref pb) = python_info.python_binary { - match get_interpreter_address_from_binary(pb, &*python_info.maps, process, version) { - Ok(addr) => return Ok(addr), - err => Some(err) - } - } else { - None - }; + let err = if let Some(ref pb) = python_info.python_binary { + match get_interpreter_address_from_binary(pb, &*python_info.maps, process, version) { + Ok(addr) => return Ok(addr), + err => Some(err), + } + } else { + None + }; // Before giving up, try again if there is a libpython.so if let Some(ref lpb) = python_info.libpython_binary { info!("Failed to get interpreter from binary BSS, scanning libpython BSS"); match get_interpreter_address_from_binary(lpb, &*python_info.maps, process, version) { Ok(addr) => return Ok(addr), - lib_err => err.unwrap_or(lib_err) + lib_err => err.unwrap_or(lib_err), } } else { err.expect("Both python and libpython are invalid.") } } -fn get_interpreter_address_from_binary

(binary: &BinaryInfo, - maps: &dyn ContainsAddr, - process: &P, - version: &Version) -> Result where P: ProcessMemory { +fn get_interpreter_address_from_binary

( + binary: &BinaryInfo, + maps: &dyn ContainsAddr, + process: &P, + version: &Version, +) -> Result +where + P: ProcessMemory, +{ // We're going to scan the BSS/data section for things, and try to narrowly scan things that // look like pointers to PyinterpreterState let bss = process.copy(binary.bss_addr as usize, binary.bss_size as usize)?; #[allow(clippy::cast_ptr_alignment)] - let addrs = unsafe { slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::()) }; + let addrs = unsafe { + slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::()) + }; check_interpreter_addresses(addrs, maps, process, version) } // Checks whether a block of memory (from BSS/.data etc) contains pointers that are pointing // to a valid PyInterpreterState -fn check_interpreter_addresses

(addrs: &[usize], - maps: &dyn ContainsAddr, - process: &P, - version: &Version) -> Result - where P: ProcessMemory { +fn check_interpreter_addresses

( + addrs: &[usize], + maps: &dyn ContainsAddr, + process: &P, + version: &Version, +) -> Result +where + P: ProcessMemory, +{ // This function does all the work, but needs a type of the interpreter - fn check(addrs: &[usize], - maps: &dyn ContainsAddr, - process: &P) -> Result - where I: InterpreterState, P: ProcessMemory { + fn check(addrs: &[usize], maps: &dyn ContainsAddr, process: &P) -> Result + where + I: InterpreterState, + P: ProcessMemory, + { for &addr in addrs { if maps.contains_addr(addr) { // this address points to valid memory. try loading it up as a PyInterpreterState // to further check let interp: I = match process.copy_struct(addr) { Ok(interp) => interp, - Err(_) => continue + Err(_) => continue, }; // get the pythreadstate pointer from the interpreter object, and if it is also @@ -377,99 +458,150 @@ fn check_interpreter_addresses

(addrs: &[usize], // this is almost certainly the address of the intrepreter let thread = match process.copy_pointer(threads) { Ok(thread) => thread, - Err(_) => continue + Err(_) => continue, }; // as a final sanity check, try getting the stack_traces, and only return if this works - if thread.interp() as usize == addr && get_stack_traces(&interp, process, 0, None).is_ok() { + if thread.interp() as usize == addr + && get_stack_traces(&interp, process, 0, None).is_ok() + { return Ok(addr); } } } } - Err(format_err!("Failed to find a python interpreter in the .data section")) + Err(format_err!( + "Failed to find a python interpreter in the .data section" + )) } // different versions have different layouts, check as appropriate match version { - Version{major: 2, minor: 3..=7, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 3, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 4..=5, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 6, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 7, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 8, patch: 0, ..} => { - match version.release_flags.as_ref() { - "a1" | "a2" | "a3" => check::(addrs, maps, process), - _ => check::(addrs, maps, process) - } + Version { + major: 2, + minor: 3..=7, + .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 3, .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 4..=5, + .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 6, .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 7, .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match version.release_flags.as_ref() { + "a1" | "a2" | "a3" => check::(addrs, maps, process), + _ => check::(addrs, maps, process), }, - Version{major: 3, minor: 8, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 9, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 10, ..} => check::(addrs, maps, process), - Version{major: 3, minor: 11, ..} => check::(addrs, maps, process), - _ => Err(format_err!("Unsupported version of Python: {}", version)) + Version { + major: 3, minor: 8, .. + } => check::(addrs, maps, process), + Version { + major: 3, minor: 9, .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 10, + .. + } => check::(addrs, maps, process), + Version { + major: 3, + minor: 11, + .. + } => check::(addrs, maps, process), + _ => Err(format_err!("Unsupported version of Python: {}", version)), } } -pub fn get_threadstate_address(python_info: &PythonProcessInfo, - version: &Version, - config: &Config) -> Result { +pub fn get_threadstate_address( + python_info: &PythonProcessInfo, + version: &Version, + config: &Config, +) -> Result { let threadstate_address = match version { - Version{major: 3, minor: 7..=11, ..} => { - match python_info.get_symbol("_PyRuntime") { - Some(&addr) => { - if let Some(offset) = pyruntime::get_tstate_current_offset(&version) { - info!("Found _PyRuntime @ 0x{:016x}, getting gilstate.tstate_current from offset 0x{:x}", + Version { + major: 3, + minor: 7..=11, + .. + } => match python_info.get_symbol("_PyRuntime") { + Some(&addr) => { + if let Some(offset) = pyruntime::get_tstate_current_offset(&version) { + info!("Found _PyRuntime @ 0x{:016x}, getting gilstate.tstate_current from offset 0x{:x}", addr, offset); - addr as usize + offset - } else { - error_if_gil(config, &version, "unknown pyruntime.gilstate.tstate_current offset")?; - 0 - } - }, - None => { - error_if_gil(config, &version, "failed to find _PyRuntime symbol")?; + addr as usize + offset + } else { + error_if_gil( + config, + &version, + "unknown pyruntime.gilstate.tstate_current offset", + )?; 0 } } - }, - _ => { - match python_info.get_symbol("_PyThreadState_Current") { - Some(&addr) => { - info!("Found _PyThreadState_Current @ 0x{:016x}", addr); - addr as usize - }, - None => { - error_if_gil(config, &version, "failed to find _PyThreadState_Current symbol")?; - 0 - } + None => { + error_if_gil(config, &version, "failed to find _PyRuntime symbol")?; + 0 + } + }, + _ => match python_info.get_symbol("_PyThreadState_Current") { + Some(&addr) => { + info!("Found _PyThreadState_Current @ 0x{:016x}", addr); + addr as usize + } + None => { + error_if_gil( + config, + &version, + "failed to find _PyThreadState_Current symbol", + )?; + 0 } - } - }; + }, + }; Ok(threadstate_address) } fn error_if_gil(config: &Config, version: &Version, msg: &str) -> Result<(), Error> { lazy_static! { - static ref WARNED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false); + static ref WARNED: std::sync::atomic::AtomicBool = + std::sync::atomic::AtomicBool::new(false); } if config.gil_only { if !WARNED.load(std::sync::atomic::Ordering::Relaxed) { // only print this once - eprintln!("Cannot detect GIL holding in version '{}' on the current platform (reason: {})", version, msg); + eprintln!( + "Cannot detect GIL holding in version '{}' on the current platform (reason: {})", + version, msg + ); eprintln!("Please open an issue in https://github.com/benfred/py-spy with the Python version and your platform."); WARNED.store(true, std::sync::atomic::Ordering::Relaxed); } - Err(format_err!("Cannot detect GIL holding in version '{}' on the current platform (reason: {})", version, msg)) + Err(format_err!( + "Cannot detect GIL holding in version '{}' on the current platform (reason: {})", + version, + msg + )) } else { warn!("Unable to detect GIL usage: {}", msg); Ok(()) } } -pub trait ContainsAddr{ +pub trait ContainsAddr { fn contains_addr(&self, addr: usize) -> bool; } @@ -487,7 +619,7 @@ impl ContainsAddr for Vec { } } -#[cfg(target_os="linux")] +#[cfg(target_os = "linux")] fn is_dockerized(pid: Pid) -> Result { let self_mnt = std::fs::read_link("/proc/self/ns/mnt")?; let target_mnt = std::fs::read_link(&format!("/proc/{}/ns/mnt", pid))?; @@ -498,7 +630,11 @@ fn is_dockerized(pid: Pid) -> Result { // So use the win32 api to load up the couple of symbols we need on windows. Note: // we still can get export's from the PE file #[cfg(windows)] -pub fn get_windows_python_symbols(pid: Pid, filename: &Path, offset: u64) -> std::io::Result> { +pub fn get_windows_python_symbols( + pid: Pid, + filename: &Path, + offset: u64, +) -> std::io::Result> { use proc_maps::win_maps::SymbolLoader; let handler = SymbolLoader::new(pid)?; @@ -513,7 +649,11 @@ pub fn get_windows_python_symbols(pid: Pid, filename: &Path, offset: u64) -> std if let Ok((base, addr)) = handler.address_from_name(symbol) { // If we have a module base (ie from PDB), need to adjust by the offset // otherwise seems like we can take address directly - let addr = if base == 0 { addr } else { offset + addr - base }; + let addr = if base == 0 { + addr + } else { + offset + addr - base + }; ret.insert(String::from(*symbol), addr); } } @@ -521,7 +661,7 @@ pub fn get_windows_python_symbols(pid: Pid, filename: &Path, offset: u64) -> std Ok(ret) } -#[cfg(any(target_os="linux", target_os="freebsd"))] +#[cfg(any(target_os = "linux", target_os = "freebsd"))] pub fn is_python_lib(pathname: &str) -> bool { lazy_static! { static ref RE: Regex = Regex::new(r"/libpython\d.\d\d?(m|d|u)?.so").unwrap(); @@ -529,7 +669,7 @@ pub fn is_python_lib(pathname: &str) -> bool { RE.is_match(pathname) } -#[cfg(target_os="macos")] +#[cfg(target_os = "macos")] pub fn is_python_lib(pathname: &str) -> bool { lazy_static! { static ref RE: Regex = Regex::new(r"/libpython\d.\d\d?(m|d|u)?.(dylib|so)$").unwrap(); @@ -540,22 +680,24 @@ pub fn is_python_lib(pathname: &str) -> bool { #[cfg(windows)] pub fn is_python_lib(pathname: &str) -> bool { lazy_static! { - static ref RE: Regex = RegexBuilder::new(r"\\python\d\d\d?(m|d|u)?.dll$").case_insensitive(true).build().unwrap(); + static ref RE: Regex = RegexBuilder::new(r"\\python\d\d\d?(m|d|u)?.dll$") + .case_insensitive(true) + .build() + .unwrap(); } RE.is_match(pathname) } -#[cfg(target_os="macos")] +#[cfg(target_os = "macos")] pub fn is_python_framework(pathname: &str) -> bool { - pathname.ends_with("/Python") && - !pathname.contains("Python.app") + pathname.ends_with("/Python") && !pathname.contains("Python.app") } #[cfg(test)] mod tests { use super::*; - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] #[test] fn test_is_python_lib() { assert!(is_python_lib("~/Anaconda2/lib/libpython2.7.dylib")); @@ -573,7 +715,7 @@ mod tests { assert!(!is_python_lib("/lib/heapq.cpython-36m-darwin.dylib")); } - #[cfg(any(target_os="linux", target_os="freebsd"))] + #[cfg(any(target_os = "linux", target_os = "freebsd"))] #[test] fn test_is_python_lib() { // libpython bundled by pyinstaller https://github.com/benfred/py-spy/issues/42 @@ -587,37 +729,49 @@ mod tests { // don't blindly match libraries with python in the name (boost_python etc) assert!(!is_python_lib("/usr/lib/libboost_python.so")); - assert!(!is_python_lib("/usr/lib/x86_64-linux-gnu/libboost_python-py27.so.1.58.0")); + assert!(!is_python_lib( + "/usr/lib/x86_64-linux-gnu/libboost_python-py27.so.1.58.0" + )); assert!(!is_python_lib("/usr/lib/libboost_python-py35.so")); - } #[cfg(windows)] #[test] fn test_is_python_lib() { - assert!(is_python_lib("C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.dll")); + assert!(is_python_lib( + "C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.dll" + )); // .NET host via https://github.com/pythonnet/pythonnet - assert!(is_python_lib("C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.DLL")); + assert!(is_python_lib( + "C:\\Users\\test\\AppData\\Local\\Programs\\Python\\Python37\\python37.DLL" + )); } - - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] #[test] fn test_python_frameworks() { // homebrew v2 assert!(!is_python_framework("/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python")); - assert!(is_python_framework("/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Python")); + assert!(is_python_framework( + "/usr/local/Cellar/python@2/2.7.15_1/Frameworks/Python.framework/Versions/2.7/Python" + )); // System python from osx 10.13.6 (high sierra) assert!(!is_python_framework("/System/Library/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python")); - assert!(is_python_framework("/System/Library/Frameworks/Python.framework/Versions/2.7/Python")); + assert!(is_python_framework( + "/System/Library/Frameworks/Python.framework/Versions/2.7/Python" + )); // pyenv 3.6.6 with OSX framework enabled (https://github.com/benfred/py-spy/issues/15) // env PYTHON_CONFIGURE_OPTS="--enable-framework" pyenv install 3.6.6 - assert!(is_python_framework("/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Python")); + assert!(is_python_framework( + "/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Python" + )); assert!(!is_python_framework("/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Resources/Python.app/Contents/MacOS/Python")); // single file pyinstaller - assert!(is_python_framework("/private/var/folders/3x/qy479lpd1fb2q88lc9g4d3kr0000gn/T/_MEI2Akvi8/Python")); + assert!(is_python_framework( + "/private/var/folders/3x/qy479lpd1fb2q88lc9g4d3kr0000gn/T/_MEI2Akvi8/Python" + )); } } diff --git a/src/python_spy.rs b/src/python_spy.rs index ceb248a1..51c8c471 100644 --- a/src/python_spy.rs +++ b/src/python_spy.rs @@ -1,26 +1,30 @@ +#[cfg(windows)] +use regex::RegexBuilder; use std; use std::collections::HashMap; -#[cfg(all(target_os="linux", unwind))] +#[cfg(all(target_os = "linux", unwind))] use std::collections::HashSet; -use std::path::Path; -#[cfg(all(target_os="linux", unwind))] +#[cfg(all(target_os = "linux", unwind))] use std::iter::FromIterator; -#[cfg(windows)] -use regex::RegexBuilder; +use std::path::Path; -use anyhow::{Error, Result, Context}; -use remoteprocess::{Process, ProcessMemory, Pid, Tid}; +use anyhow::{Context, Error, Result}; +use remoteprocess::{Pid, Process, ProcessMemory, Tid}; use crate::config::{Config, LockingStrategy}; #[cfg(unwind)] use crate::native_stack_trace::NativeStack; -use crate::python_bindings::{v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{ + v2_7_15, v3_10_0, v3_11_0, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5, +}; use crate::python_data_access::format_variable; use crate::python_interpreters::{InterpreterState, ThreadState}; +use crate::python_process_info::{ + get_interpreter_address, get_python_version, get_threadstate_address, PythonProcessInfo, +}; use crate::python_threading::thread_name_lookup; -use crate::stack_trace::{StackTrace, get_stack_trace, get_gil_threadid}; +use crate::stack_trace::{get_gil_threadid, get_stack_trace, StackTrace}; use crate::version::Version; -use crate::python_process_info::{PythonProcessInfo, get_python_version, get_interpreter_address, get_threadstate_address}; /// Lets you retrieve stack traces of a running python program pub struct PythonSpy { @@ -37,8 +41,8 @@ pub struct PythonSpy { pub short_filenames: HashMap>, pub python_thread_ids: HashMap, pub python_thread_names: HashMap, - #[cfg(target_os="linux")] - pub dockerized: bool + #[cfg(target_os = "linux")] + pub dockerized: bool, } impl PythonSpy { @@ -53,7 +57,7 @@ impl PythonSpy { // lock the process when loading up on freebsd (rather than locking // on every memory read). Needs done after getting python process info // because procmaps also tries to attach w/ ptrace on freebsd - #[cfg(target_os="freebsd")] + #[cfg(target_os = "freebsd")] let _lock = process.lock(); let version = get_python_version(&python_info, &process)?; @@ -61,7 +65,7 @@ impl PythonSpy { let interpreter_address = get_interpreter_address(&python_info, &process, &version)?; info!("Found interpreter at 0x{:016x}", interpreter_address); - + // lets us figure out which thread has the GIL let threadstate_address = get_threadstate_address(&python_info, &version, config)?; @@ -69,28 +73,38 @@ impl PythonSpy { #[cfg(unwind)] let native = if config.native { - Some(NativeStack::new(pid, python_info.python_binary, python_info.libpython_binary)?) + Some(NativeStack::new( + pid, + python_info.python_binary, + python_info.libpython_binary, + )?) } else { None }; - Ok(PythonSpy{pid, process, version, interpreter_address, threadstate_address, - python_filename: python_info.python_filename, - version_string, - #[cfg(unwind)] - native, - #[cfg(target_os="linux")] - dockerized: python_info.dockerized, - config: config.clone(), - short_filenames: HashMap::new(), - python_thread_ids: HashMap::new(), - python_thread_names: HashMap::new()}) + Ok(PythonSpy { + pid, + process, + version, + interpreter_address, + threadstate_address, + python_filename: python_info.python_filename, + version_string, + #[cfg(unwind)] + native, + #[cfg(target_os = "linux")] + dockerized: python_info.dockerized, + config: config.clone(), + short_filenames: HashMap::new(), + python_thread_ids: HashMap::new(), + python_thread_names: HashMap::new(), + }) } /// Creates a PythonSpy object, retrying up to max_retries times. /// Mainly useful for the case where the process is just started and /// symbols or the python interpreter might not be loaded yet. - pub fn retry_new(pid: Pid, config: &Config, max_retries:u64) -> Result { + pub fn retry_new(pid: Pid, config: &Config, max_retries: u64) -> Result { let mut retries = 0; loop { let err = match PythonSpy::new(pid, config) { @@ -98,10 +112,10 @@ impl PythonSpy { // verify that we can load a stack trace before returning success match process.get_stack_traces() { Ok(_) => return Ok(process), - Err(err) => err + Err(err) => err, } - }, - Err(err) => err + } + Err(err) => err, }; // If we failed, retry a couple times before returning the last error @@ -118,25 +132,57 @@ impl PythonSpy { pub fn get_stack_traces(&mut self) -> Result, Error> { match self.version { // ABI for 2.3/2.4/2.5/2.6/2.7 is compatible for our purpose - Version{major: 2, minor: 3..=7, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 3, ..} => self._get_stack_traces::(), + Version { + major: 2, + minor: 3..=7, + .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 3, .. + } => self._get_stack_traces::(), // ABI for 3.4 and 3.5 is the same for our purposes - Version{major: 3, minor: 4, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 5, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 6, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 7, ..} => self._get_stack_traces::(), + Version { + major: 3, minor: 4, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 5, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 6, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 7, .. + } => self._get_stack_traces::(), // v3.8.0a1 to v3.8.0a3 is compatible with 3.7 ABI, but later versions of 3.8.0 aren't - Version{major: 3, minor: 8, patch: 0, ..} => { - match self.version.release_flags.as_ref() { - "a1" | "a2" | "a3" => self._get_stack_traces::(), - _ => self._get_stack_traces::() - } - } - Version{major: 3, minor: 8, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 9, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 10, ..} => self._get_stack_traces::(), - Version{major: 3, minor: 11, ..} => self._get_stack_traces::(), - _ => Err(format_err!("Unsupported version of Python: {}", self.version)), + Version { + major: 3, + minor: 8, + patch: 0, + .. + } => match self.version.release_flags.as_ref() { + "a1" | "a2" | "a3" => self._get_stack_traces::(), + _ => self._get_stack_traces::(), + }, + Version { + major: 3, minor: 8, .. + } => self._get_stack_traces::(), + Version { + major: 3, minor: 9, .. + } => self._get_stack_traces::(), + Version { + major: 3, + minor: 10, + .. + } => self._get_stack_traces::(), + Version { + major: 3, + minor: 11, + .. + } => self._get_stack_traces::(), + _ => Err(format_err!( + "Unsupported version of Python: {}", + self.version + )), } } @@ -162,18 +208,29 @@ impl PythonSpy { // TODO: hoist most of this code out to stack_trace.rs, and // then annotate the output of that with things like native stack traces etc // have moved in gil / locals etc - let gil_thread_id = get_gil_threadid::(self.threadstate_address, &self.process)?; + let gil_thread_id = + get_gil_threadid::(self.threadstate_address, &self.process)?; // Get the python interpreter, and loop over all the python threads - let interp: I = self.process.copy_struct(self.interpreter_address) - .context("Failed to copy PyInterpreterState from process")?; + let interp: I = self + .process + .copy_struct(self.interpreter_address) + .context("Failed to copy PyInterpreterState from process")?; let mut traces = Vec::new(); let mut threads = interp.head(); while !threads.is_null() { // Get the stack trace of the python thread - let thread = self.process.copy_pointer(threads).context("Failed to copy PyThreadState")?; - let mut trace = get_stack_trace(&thread, &self.process, self.config.dump_locals > 0, self.config.lineno)?; + let thread = self + .process + .copy_pointer(threads) + .context("Failed to copy PyThreadState")?; + let mut trace = get_stack_trace( + &thread, + &self.process, + self.config.dump_locals > 0, + self.config.lineno, + )?; // Try getting the native thread id let python_thread_id = thread.thread_id(); @@ -226,7 +283,9 @@ impl PythonSpy { { if self.config.native { if let Some(native) = self.native.as_mut() { - let thread_id = trace.os_thread_id.ok_or_else(|| format_err!("failed to get os threadid"))?; + let thread_id = trace + .os_thread_id + .ok_or_else(|| format_err!("failed to get os threadid"))?; let os_thread = remoteprocess::Thread::new(thread_id as Tid)?; trace.frames = native.merge_native_thread(&trace.frames, &os_thread)? } @@ -238,7 +297,12 @@ impl PythonSpy { if let Some(locals) = frame.locals.as_mut() { let max_length = (128 * self.config.dump_locals) as isize; for local in locals { - let repr = format_variable::(&self.process, &self.version, local.addr, max_length); + let repr = format_variable::( + &self.process, + &self.version, + local.addr, + max_length, + ); local.repr = Some(repr.unwrap_or("?".to_owned())); } } @@ -266,22 +330,31 @@ impl PythonSpy { false } else { let frame = &frames[0]; - (frame.name == "wait" && frame.filename.ends_with("threading.py")) || - (frame.name == "select" && frame.filename.ends_with("selectors.py")) || - (frame.name == "poll" && (frame.filename.ends_with("asyncore.py") || - frame.filename.contains("zmq") || - frame.filename.contains("gevent") || - frame.filename.contains("tornado"))) + (frame.name == "wait" && frame.filename.ends_with("threading.py")) + || (frame.name == "select" && frame.filename.ends_with("selectors.py")) + || (frame.name == "poll" + && (frame.filename.ends_with("asyncore.py") + || frame.filename.contains("zmq") + || frame.filename.contains("gevent") + || frame.filename.contains("tornado"))) } } #[cfg(windows)] - fn _get_os_thread_id(&mut self, python_thread_id: u64, _interp: &I) -> Result, Error> { + fn _get_os_thread_id( + &mut self, + python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { Ok(Some(python_thread_id as Tid)) } - #[cfg(target_os="macos")] - fn _get_os_thread_id(&mut self, python_thread_id: u64, _interp: &I) -> Result, Error> { + #[cfg(target_os = "macos")] + fn _get_os_thread_id( + &mut self, + python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { // If we've already know this threadid, we're good if let Some(thread_id) = self.python_thread_ids.get(&python_thread_id) { return Ok(Some(*thread_id)); @@ -300,13 +373,21 @@ impl PythonSpy { Ok(None) } - #[cfg(all(target_os="linux", not(unwind)))] - fn _get_os_thread_id(&mut self, _python_thread_id: u64, _interp: &I) -> Result, Error> { + #[cfg(all(target_os = "linux", not(unwind)))] + fn _get_os_thread_id( + &mut self, + _python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { Ok(None) } - #[cfg(all(target_os="linux", unwind))] - fn _get_os_thread_id(&mut self, python_thread_id: u64, interp: &I) -> Result, Error> { + #[cfg(all(target_os = "linux", unwind))] + fn _get_os_thread_id( + &mut self, + python_thread_id: u64, + interp: &I, + ) -> Result, Error> { // in nonblocking mode, we can't get the threadid reliably (method here requires reading the RBX // register which requires a ptrace attach). fallback to heuristic thread activity here if self.config.blocking == LockingStrategy::NonBlocking { @@ -327,13 +408,17 @@ impl PythonSpy { let mut all_python_threads = HashSet::new(); let mut threads = interp.head(); while !threads.is_null() { - let thread = self.process.copy_pointer(threads).context("Failed to copy PyThreadState")?; + let thread = self + .process + .copy_pointer(threads) + .context("Failed to copy PyThreadState")?; let current = thread.thread_id(); all_python_threads.insert(current); threads = thread.next(); } - let processed_os_threads: HashSet = HashSet::from_iter(self.python_thread_ids.values().map(|x| *x)); + let processed_os_threads: HashSet = + HashSet::from_iter(self.python_thread_ids.values().map(|x| *x)); let unwinder = self.process.unwinder()?; @@ -349,8 +434,10 @@ impl PythonSpy { if pthread_id != 0 { self.python_thread_ids.insert(pthread_id, threadid); } - }, - Err(e) => { warn!("Failed to get get_pthread_id for {}: {}", threadid, e); } + } + Err(e) => { + warn!("Failed to get get_pthread_id for {}: {}", threadid, e); + } }; } @@ -380,9 +467,13 @@ impl PythonSpy { Ok(None) } - - #[cfg(all(target_os="linux", unwind))] - pub fn _get_pthread_id(&self, unwinder: &remoteprocess::Unwinder, thread: &remoteprocess::Thread, threadids: &HashSet) -> Result { + #[cfg(all(target_os = "linux", unwind))] + pub fn _get_pthread_id( + &self, + unwinder: &remoteprocess::Unwinder, + thread: &remoteprocess::Thread, + threadids: &HashSet, + ) -> Result { let mut pthread_id = 0; let mut cursor = unwinder.cursor(thread)?; @@ -400,8 +491,12 @@ impl PythonSpy { Ok(pthread_id) } - #[cfg(target_os="freebsd")] - fn _get_os_thread_id(&mut self, _python_thread_id: u64, _interp: &I) -> Result, Error> { + #[cfg(target_os = "freebsd")] + fn _get_os_thread_id( + &mut self, + _python_thread_id: u64, + _interp: &I, + ) -> Result, Error> { Ok(None) } @@ -409,8 +504,11 @@ impl PythonSpy { match self.python_thread_names.get(&python_thread_id) { Some(thread_name) => Some(thread_name.clone()), None => { - self.python_thread_names = thread_name_lookup(self).unwrap_or_else(|| HashMap::new()); - self.python_thread_names.get(&python_thread_id).map(|name| name.clone()) + self.python_thread_names = + thread_name_lookup(self).unwrap_or_else(|| HashMap::new()); + self.python_thread_names + .get(&python_thread_id) + .map(|name| name.clone()) } } } @@ -430,10 +528,10 @@ impl PythonSpy { } // on linux the process could be running in docker, access the filename through procfs - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let filename_storage; - #[cfg(target_os="linux")] + #[cfg(target_os = "linux")] let filename = if self.dockerized { filename_storage = format!("/proc/{}/root{}", self.pid, filename); if Path::new(&filename_storage).exists() { @@ -460,7 +558,8 @@ impl PythonSpy { .ok() .map(|p| p.to_string_lossy().to_string()); - self.short_filenames.insert(filename.to_owned(), shortened.clone()); + self.short_filenames + .insert(filename.to_owned(), shortened.clone()); shortened } } diff --git a/src/python_threading.rs b/src/python_threading.rs index 446f0434..ea5ed89b 100644 --- a/src/python_threading.rs +++ b/src/python_threading.rs @@ -2,10 +2,10 @@ use std::collections::HashMap; use anyhow::Error; -use crate::python_bindings::{v3_6_6, v3_7_0, v3_8_0, v3_9_5, v3_10_0, v3_11_0}; +use crate::python_bindings::{v3_10_0, v3_11_0, v3_6_6, v3_7_0, v3_8_0, v3_9_5}; +use crate::python_data_access::{copy_long, copy_string, DictIterator, PY_TPFLAGS_MANAGED_DICT}; use crate::python_interpreters::{InterpreterState, Object, TypeObject}; use crate::python_spy::PythonSpy; -use crate::python_data_access::{copy_string, copy_long, DictIterator, PY_TPFLAGS_MANAGED_DICT}; use crate::version::Version; @@ -14,7 +14,10 @@ use remoteprocess::ProcessMemory; /// Returns a hashmap of threadid: threadname, by inspecting the '_active' variable in the /// 'threading' module. pub fn thread_names_from_interpreter( - interp: &I, process: &P, version: &Version) -> Result, Error> { + interp: &I, + process: &P, + version: &Version, +) -> Result, Error> { let mut ret = HashMap::new(); for entry in DictIterator::from(process, &version, interp.modules() as usize)? { let (key, value) = entry?; @@ -28,7 +31,6 @@ pub fn thread_names_from_interpreter( let name = copy_string(key as *const I::StringObject, process)?; if name == "_active" { - for i in DictIterator::from(process, &version, value)? { let (key, value) = i?; let (threadid, _) = copy_long(process, key)?; @@ -37,10 +39,15 @@ pub fn thread_names_from_interpreter( let thread_type = process.copy_pointer(thread.ob_type())?; let dict_iter = if thread_type.flags() & PY_TPFLAGS_MANAGED_DICT != 0 { - DictIterator::from_managed_dict(process, &version, value, thread.ob_type() as usize)? + DictIterator::from_managed_dict( + process, + &version, + value, + thread.ob_type() as usize, + )? } else { let dict_offset = thread_type.dictoffset(); - let dict_addr =(value as isize + dict_offset) as usize; + let dict_addr = (value as isize + dict_offset) as usize; let thread_dict_addr: usize = process.copy_struct(dict_addr)?; DictIterator::from(process, &version, thread_dict_addr)? }; @@ -50,7 +57,8 @@ pub fn thread_names_from_interpreter( let varname = copy_string(key as *const I::StringObject, process)?; if varname == "_name" { - let threadname = copy_string(value as *const I::StringObject, process)?; + let threadname = + copy_string(value as *const I::StringObject, process)?; ret.insert(threadid as u64, threadname); break; } @@ -67,9 +75,15 @@ pub fn thread_names_from_interpreter( /// Returns a hashmap of threadid: threadname, by inspecting the '_active' variable in the /// 'threading' module. -fn _thread_name_lookup(spy: &PythonSpy) -> Result, Error> { +fn _thread_name_lookup( + spy: &PythonSpy, +) -> Result, Error> { let interp: I = spy.process.copy_struct(spy.interpreter_address)?; - Ok(thread_names_from_interpreter(&interp, &spy.process, &spy.version)?) + Ok(thread_names_from_interpreter( + &interp, + &spy.process, + &spy.version, + )?) } // try getting the threadnames, but don't sweat it if we can't. Since this relies on dictionary @@ -77,13 +91,29 @@ fn _thread_name_lookup(spy: &PythonSpy) -> Result Option> { let err = match process.version { - Version{major: 3, minor: 6, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 7, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 8, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 9, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 10, ..} => _thread_name_lookup::(&process), - Version{major: 3, minor: 11, ..} => _thread_name_lookup::(&process), - _ => return None + Version { + major: 3, minor: 6, .. + } => _thread_name_lookup::(&process), + Version { + major: 3, minor: 7, .. + } => _thread_name_lookup::(&process), + Version { + major: 3, minor: 8, .. + } => _thread_name_lookup::(&process), + Version { + major: 3, minor: 9, .. + } => _thread_name_lookup::(&process), + Version { + major: 3, + minor: 10, + .. + } => _thread_name_lookup::(&process), + Version { + major: 3, + minor: 11, + .. + } => _thread_name_lookup::(&process), + _ => return None, }; err.ok() } diff --git a/src/sampler.rs b/src/sampler.rs index 2f60f511..d125ab0a 100644 --- a/src/sampler.rs +++ b/src/sampler.rs @@ -1,17 +1,17 @@ use std::collections::HashMap; -use std::sync::mpsc::{self, Sender, Receiver}; -use std::sync::{Mutex, Arc}; -use std::time::Duration; +use std::sync::mpsc::{self, Receiver, Sender}; +use std::sync::{Arc, Mutex}; use std::thread; +use std::time::Duration; use anyhow::Error; use remoteprocess::Pid; -use crate::timer::Timer; -use crate::python_spy::PythonSpy; use crate::config::Config; -use crate::stack_trace::{StackTrace, ProcessInfo}; +use crate::python_spy::PythonSpy; +use crate::stack_trace::{ProcessInfo, StackTrace}; +use crate::timer::Timer; use crate::version::Version; pub struct Sampler { @@ -23,7 +23,7 @@ pub struct Sampler { pub struct Sample { pub traces: Vec, pub sampling_errors: Option>, - pub late: Option + pub late: Option, } impl Sampler { @@ -38,7 +38,10 @@ impl Sampler { /// Creates a new sampler object, reading from a single process only fn new_sampler(pid: Pid, config: &Config) -> Result { let (tx, rx): (Sender, Receiver) = mpsc::channel(); - let (initialized_tx, initialized_rx): (Sender>, Receiver>) = mpsc::channel(); + let (initialized_tx, initialized_rx): ( + Sender>, + Receiver>, + ) = mpsc::channel(); let config = config.clone(); let sampling_thread = thread::spawn(move || { // We need to create this object inside the thread here since PythonSpy objects don't @@ -49,8 +52,8 @@ impl Sampler { return; } spy - }, - Err(e) => { + } + Err(e) => { if initialized_tx.send(Err(e)).is_err() {} return; } @@ -62,7 +65,10 @@ impl Sampler { Ok(traces) => traces, Err(e) => { if spy.process.exe().is_err() { - info!("stopped sampling pid {} because the process exited", spy.pid); + info!( + "stopped sampling pid {} because the process exited", + spy.pid + ); break; } sampling_errors = Some(vec![(spy.pid, e)]); @@ -71,14 +77,25 @@ impl Sampler { }; let late = sleep.err(); - if tx.send(Sample{traces: traces, sampling_errors, late}).is_err() { + if tx + .send(Sample { + traces: traces, + sampling_errors, + late, + }) + .is_err() + { break; } } }); let version = initialized_rx.recv()??; - Ok(Sampler{rx: Some(rx), version: Some(version), sampling_thread: Some(sampling_thread)}) + Ok(Sampler { + rx: Some(rx), + version: Some(version), + sampling_thread: Some(sampling_thread), + }) } /// Creates a new sampler object that samples any python process in the @@ -96,8 +113,12 @@ impl Sampler { // If we can't create the child process, don't worry about it // can happen with zombie child processes etc match PythonSpyThread::new(childpid, Some(parentpid), &config) { - Ok(spy) => { spies.insert(childpid, spy); }, - Err(e) => { warn!("Failed to open process {}: {}", childpid, e); } + Ok(spy) => { + spies.insert(childpid, spy); + } + Err(e) => { + warn!("Failed to open process {}: {}", childpid, e); + } } } @@ -110,7 +131,10 @@ impl Sampler { // Otherwise sleep for a short time and retry retries -= 1; if retries == 0 { - return Err(format_err!("No python processes found in process {} or any of its subprocesses", pid)); + return Err(format_err!( + "No python processes found in process {} or any of its subprocesses", + pid + )); } std::thread::sleep(std::time::Duration::from_millis(100)); } @@ -124,17 +148,26 @@ impl Sampler { while process.exe().is_ok() { match monitor_spies.lock() { Ok(mut spies) => { - for (childpid, parentpid) in process.child_processes().expect("failed to get subprocesses") { + for (childpid, parentpid) in process + .child_processes() + .expect("failed to get subprocesses") + { if spies.contains_key(&childpid) { continue; } match PythonSpyThread::new(childpid, Some(parentpid), &monitor_config) { - Ok(spy) => { spies.insert(childpid, spy); } - Err(e) => { warn!("Failed to create spy for {}: {}", childpid, e); } + Ok(spy) => { + spies.insert(childpid, spy); + } + Err(e) => { + warn!("Failed to create spy for {}: {}", childpid, e); + } } } - }, - Err(e) => { error!("Failed to acquire lock: {}", e); } + } + Err(e) => { + error!("Failed to acquire lock: {}", e); + } } std::thread::sleep(Duration::from_millis(100)); } @@ -168,11 +201,11 @@ impl Sampler { // collect the traces from each python spy if possible for spy in spies.values_mut() { match spy.collect() { - Some(Ok(mut t)) => { traces.append(&mut t) }, + Some(Ok(mut t)) => traces.append(&mut t), Some(Err(e)) => { let errors = sampling_errors.get_or_insert_with(|| Vec::new()); errors.push((spy.process.pid, e)); - }, + } None => {} } } @@ -181,15 +214,22 @@ impl Sampler { for trace in traces.iter_mut() { let pid = trace.pid; // Annotate each trace with the process info for the current - let process = process_info.entry(pid).or_insert_with(|| { - get_process_info(pid, &spies).map(|p| Arc::new(*p)) - }); + let process = process_info + .entry(pid) + .or_insert_with(|| get_process_info(pid, &spies).map(|p| Arc::new(*p))); trace.process_info = process.clone(); } // Send the collected info back let late = sleep.err(); - if tx.send(Sample{traces, sampling_errors, late}).is_err() { + if tx + .send(Sample { + traces, + sampling_errors, + late, + }) + .is_err() + { break; } @@ -200,7 +240,11 @@ impl Sampler { } }); - Ok(Sampler{rx: Some(rx), version: None, sampling_thread: Some(sampling_thread)}) + Ok(Sampler { + rx: Some(rx), + version: None, + sampling_thread: Some(sampling_thread), + }) } } @@ -229,17 +273,26 @@ struct PythonSpyThread { notified: bool, pub process: remoteprocess::Process, pub parent: Option, - pub command_line: String + pub command_line: String, } impl PythonSpyThread { fn new(pid: Pid, parent: Option, config: &Config) -> Result { - let (initialized_tx, initialized_rx): (Sender>, Receiver>) = mpsc::channel(); + let (initialized_tx, initialized_rx): ( + Sender>, + Receiver>, + ) = mpsc::channel(); let (notify_tx, notify_rx): (Sender<()>, Receiver<()>) = mpsc::channel(); - let (sample_tx, sample_rx): (Sender, Error>>, Receiver, Error>>) = mpsc::channel(); + let (sample_tx, sample_rx): ( + Sender, Error>>, + Receiver, Error>>, + ) = mpsc::channel(); let config = config.clone(); let process = remoteprocess::Process::new(pid)?; - let command_line = process.cmdline().map(|x| x.join(" ")).unwrap_or("".to_owned()); + let command_line = process + .cmdline() + .map(|x| x.join(" ")) + .unwrap_or("".to_owned()); thread::spawn(move || { // We need to create this object inside the thread here since PythonSpy objects don't @@ -250,8 +303,8 @@ impl PythonSpyThread { return; } spy - }, - Err(e) => { + } + Err(e) => { warn!("Failed to profile python from process {}: {}", pid, e); if initialized_tx.send(Err(e)).is_err() {} return; @@ -262,7 +315,10 @@ impl PythonSpyThread { let result = spy.get_stack_traces(); if let Err(_) = result { if spy.process.exe().is_err() { - info!("stopped sampling pid {} because the process exited", spy.pid); + info!( + "stopped sampling pid {} because the process exited", + spy.pid + ); break; } } @@ -271,19 +327,32 @@ impl PythonSpyThread { } } }); - Ok(PythonSpyThread{initialized_rx, notify_tx, sample_rx, process, command_line, parent, initialized: None, running: false, notified: false}) + Ok(PythonSpyThread { + initialized_rx, + notify_tx, + sample_rx, + process, + command_line, + parent, + initialized: None, + running: false, + notified: false, + }) } - fn wait_initialized(&mut self) -> bool { + fn wait_initialized(&mut self) -> bool { match self.initialized_rx.recv() { Ok(status) => { self.running = status.is_ok(); self.initialized = Some(status); self.running - }, + } Err(e) => { // shouldn't happen, but will be ok if it does - warn!("Failed to get initialization status from PythonSpyThread: {}", e); + warn!( + "Failed to get initialization status from PythonSpyThread: {}", + e + ); false } } @@ -298,7 +367,7 @@ impl PythonSpyThread { self.running = status.is_ok(); self.initialized = Some(status); self.running - }, + } Err(std::sync::mpsc::TryRecvError::Empty) => false, Err(std::sync::mpsc::TryRecvError::Disconnected) => { // this *shouldn't* happen @@ -310,12 +379,16 @@ impl PythonSpyThread { fn notify(&mut self) { match self.notify_tx.send(()) { - Ok(_) => { self.notified = true; }, - Err(_) => { self.running = false; } + Ok(_) => { + self.notified = true; + } + Err(_) => { + self.running = false; + } } } - fn collect(&mut self) -> Option, Error>> { + fn collect(&mut self) -> Option, Error>> { if !self.notified { return None; } @@ -332,7 +405,13 @@ impl PythonSpyThread { fn get_process_info(pid: Pid, spies: &HashMap) -> Option> { spies.get(&pid).map(|spy| { - let parent = spy.parent.and_then(|parentpid| get_process_info(parentpid, spies)); - Box::new(ProcessInfo{pid, parent, command_line: spy.command_line.clone()}) + let parent = spy + .parent + .and_then(|parentpid| get_process_info(parentpid, spies)); + Box::new(ProcessInfo { + pid, + parent, + command_line: spy.command_line.clone(), + }) }) } diff --git a/src/speedscope.rs b/src/speedscope.rs index 5ce378de..dcb99fa3 100644 --- a/src/speedscope.rs +++ b/src/speedscope.rs @@ -26,14 +26,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -use std::collections::{HashMap}; +use std::collections::HashMap; use std::io; use std::io::Write; use crate::stack_trace; -use remoteprocess::{Tid, Pid}; +use remoteprocess::{Pid, Tid}; -use anyhow::{Error}; +use anyhow::Error; use serde_derive::{Deserialize, Serialize}; use serde_json; @@ -128,40 +128,51 @@ enum ValueUnit { } impl SpeedscopeFile { - pub fn new(samples: &HashMap<(Pid, Tid), Vec>>, frames: &Vec, - thread_name_map: &HashMap<(Pid, Tid), String>, sample_rate: u64) -> SpeedscopeFile { - - let mut profiles: Vec = samples.iter().map(|(thread_id, samples)| { - let end_value = samples.len(); - // we sample at 100 Hz, so scale the end value and weights to match the time unit - let scaled_end_value = end_value as f64 / sample_rate as f64; - let weights: Vec = (&samples).iter().map(|_s| 1_f64 / sample_rate as f64).collect(); - - Profile { - profile_type: ProfileType::Sampled, - name: thread_name_map.get(thread_id).map_or_else(|| "py-spy".to_string(), |x| x.clone()), - unit: ValueUnit::Seconds, - start_value: 0.0, - end_value: scaled_end_value, - samples: samples.clone(), - weights + pub fn new( + samples: &HashMap<(Pid, Tid), Vec>>, + frames: &Vec, + thread_name_map: &HashMap<(Pid, Tid), String>, + sample_rate: u64, + ) -> SpeedscopeFile { + let mut profiles: Vec = samples + .iter() + .map(|(thread_id, samples)| { + let end_value = samples.len(); + // we sample at 100 Hz, so scale the end value and weights to match the time unit + let scaled_end_value = end_value as f64 / sample_rate as f64; + let weights: Vec = (&samples) + .iter() + .map(|_s| 1_f64 / sample_rate as f64) + .collect(); + + Profile { + profile_type: ProfileType::Sampled, + name: thread_name_map + .get(thread_id) + .map_or_else(|| "py-spy".to_string(), |x| x.clone()), + unit: ValueUnit::Seconds, + start_value: 0.0, + end_value: scaled_end_value, + samples: samples.clone(), + weights, + } + }) + .collect(); + + profiles.sort_by(|a, b| a.name.cmp(&b.name)); + + SpeedscopeFile { + // This is always the same + schema: "https://www.speedscope.app/file-format-schema.json".to_string(), + active_profile_index: None, + name: Some("py-spy profile".to_string()), + exporter: Some(format!("py-spy@{}", env!("CARGO_PKG_VERSION"))), + profiles: profiles, + shared: Shared { + frames: frames.clone(), + }, } - }).collect(); - - profiles.sort_by(|a, b| a.name.cmp(&b.name)); - - SpeedscopeFile { - // This is always the same - schema: "https://www.speedscope.app/file-format-schema.json".to_string(), - active_profile_index: None, - name: Some("py-spy profile".to_string()), - exporter: Some(format!("py-spy@{}", env!("CARGO_PKG_VERSION"))), - profiles: profiles, - shared: Shared { - frames: frames.clone() - } } - } } impl Frame { @@ -170,8 +181,12 @@ impl Frame { name: stack_frame.name.clone(), // TODO: filename? file: Some(stack_frame.filename.clone()), - line: if show_line_numbers { Some(stack_frame.line as u32) } else { None }, - col: None + line: if show_line_numbers { + Some(stack_frame.line as u32) + } else { + None + }, + col: None, } } } @@ -197,30 +212,43 @@ impl Stats { pub fn record(&mut self, stack: &stack_trace::StackTrace) -> Result<(), io::Error> { let show_line_numbers = self.config.show_line_numbers; - let mut frame_indices: Vec = stack.frames.iter().map(|frame| { - let frames = &mut self.frames; - let mut key = frame.clone(); - if !show_line_numbers { - key.line = 0; - } - *self.frame_to_index.entry(key).or_insert_with(|| { - let len = frames.len(); - frames.push(Frame::new(&frame, show_line_numbers)); - len + let mut frame_indices: Vec = stack + .frames + .iter() + .map(|frame| { + let frames = &mut self.frames; + let mut key = frame.clone(); + if !show_line_numbers { + key.line = 0; + } + *self.frame_to_index.entry(key).or_insert_with(|| { + let len = frames.len(); + frames.push(Frame::new(&frame, show_line_numbers)); + len + }) }) - }).collect(); + .collect(); frame_indices.reverse(); let key = (stack.pid as Pid, stack.thread_id as Tid); - self.samples.entry(key).or_insert_with(|| { - vec![] - }).push(frame_indices); + self.samples + .entry(key) + .or_insert_with(|| vec![]) + .push(frame_indices); let subprocesses = self.config.subprocesses; self.thread_name_map.entry(key).or_insert_with(|| { - let thread_name = stack.thread_name.as_ref().map_or_else(|| "".to_string(), |x| x.clone()); + let thread_name = stack + .thread_name + .as_ref() + .map_or_else(|| "".to_string(), |x| x.clone()); if subprocesses { - format!("Process {} Thread {} \"{}\"", stack.pid, stack.format_threadid(), thread_name) + format!( + "Process {} Thread {} \"{}\"", + stack.pid, + stack.format_threadid(), + thread_name + ) } else { format!("Thread {} \"{}\"", stack.format_threadid(), thread_name) } @@ -230,7 +258,12 @@ impl Stats { } pub fn write(&self, w: &mut dyn Write) -> Result<(), Error> { - let json = serde_json::to_string(&SpeedscopeFile::new(&self.samples, &self.frames, &self.thread_name_map, self.config.sampling_rate))?; + let json = serde_json::to_string(&SpeedscopeFile::new( + &self.samples, + &self.frames, + &self.thread_name_map, + self.config.sampling_rate, + ))?; writeln!(w, "{}", json)?; Ok(()) } @@ -244,7 +277,11 @@ mod tests { #[test] fn test_speedscope_units() { let sample_rate = 100; - let config = Config{show_line_numbers: true, sampling_rate: sample_rate, ..Default::default()}; + let config = Config { + show_line_numbers: true, + sampling_rate: sample_rate, + ..Default::default() + }; let mut stats = Stats::new(&config); let mut cursor = Cursor::new(Vec::new()); diff --git a/src/stack_trace.rs b/src/stack_trace.rs index b7d58163..213a6131 100644 --- a/src/stack_trace.rs +++ b/src/stack_trace.rs @@ -3,12 +3,14 @@ use std::sync::Arc; use anyhow::{Context, Error, Result}; -use remoteprocess::{ProcessMemory, Pid}; +use remoteprocess::{Pid, ProcessMemory}; use serde_derive::Serialize; -use crate::python_interpreters::{InterpreterState, ThreadState, FrameObject, CodeObject, TupleObject}; -use crate::python_data_access::{copy_string, copy_bytes}; use crate::config::{Config, LineNo}; +use crate::python_data_access::{copy_bytes, copy_string}; +use crate::python_interpreters::{ + CodeObject, FrameObject, InterpreterState, ThreadState, TupleObject, +}; /// Call stack for a single python thread #[derive(Debug, Clone, Serialize)] @@ -28,7 +30,7 @@ pub struct StackTrace { /// The frames pub frames: Vec, /// process commandline / parent process info - pub process_info: Option> + pub process_info: Option>, } /// Information about a single function call in a stack trace @@ -58,15 +60,22 @@ pub struct LocalVariable { #[derive(Debug, Clone, Serialize)] pub struct ProcessInfo { - pub pid: Pid, + pub pid: Pid, pub command_line: String, - pub parent: Option> + pub parent: Option>, } /// Given an InterpreterState, this function returns a vector of stack traces for each thread -pub fn get_stack_traces(interpreter: &I, process: &P, threadstate_address: usize, config: Option<&Config>) -> Result, Error> - where I: InterpreterState, P: ProcessMemory { - +pub fn get_stack_traces( + interpreter: &I, + process: &P, + threadstate_address: usize, + config: Option<&Config>, +) -> Result, Error> +where + I: InterpreterState, + P: ProcessMemory, +{ let gil_thread_id = get_gil_threadid::(threadstate_address, process)?; let mut ret = Vec::new(); @@ -76,7 +85,9 @@ pub fn get_stack_traces(interpreter: &I, process: &P, threadstate_address: let dump_locals = config.map(|c| c.dump_locals).unwrap_or(0); while !threads.is_null() { - let thread = process.copy_pointer(threads).context("Failed to copy PyThreadState")?; + let thread = process + .copy_pointer(threads) + .context("Failed to copy PyThreadState")?; let mut trace = get_stack_trace(&thread, process, dump_locals > 0, lineno)?; trace.owns_gil = trace.thread_id == gil_thread_id; @@ -92,8 +103,16 @@ pub fn get_stack_traces(interpreter: &I, process: &P, threadstate_address: } /// Gets a stack trace for an individual thread -pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: LineNo) -> Result - where T: ThreadState, P: ProcessMemory { +pub fn get_stack_trace( + thread: &T, + process: &P, + copy_locals: bool, + lineno: LineNo, +) -> Result +where + T: ThreadState, + P: ProcessMemory, +{ // TODO: just return frames here? everything else probably should be returned out of scope let mut frames = Vec::new(); @@ -105,8 +124,12 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: let mut frame_ptr = thread.frame(frame_address); while !frame_ptr.is_null() { - let frame = process.copy_pointer(frame_ptr).context("Failed to copy PyFrameObject")?; - let code = process.copy_pointer(frame.code()).context("Failed to copy PyCodeObject")?; + let frame = process + .copy_pointer(frame_ptr) + .context("Failed to copy PyFrameObject")?; + let code = process + .copy_pointer(frame.code()) + .context("Failed to copy PyCodeObject")?; let filename = copy_string(code.filename(), process).context("Failed to copy filename")?; let name = copy_string(code.name(), process).context("Failed to copy function name")?; @@ -121,10 +144,13 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: // can happen in extreme cases (https://github.com/benfred/py-spy/issues/164) // Rather than fail set the linenumber to 0. This is used by the native extensions // to indicate that we can't load a line number and it should be handled gracefully - warn!("Failed to get line number from {}.{}: {}", filename, name, e); + warn!( + "Failed to get line number from {}.{}: {}", + filename, name, e + ); 0 } - } + }, }; let locals = if copy_locals { @@ -133,7 +159,14 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: None }; - frames.push(Frame{name, filename, line, short_filename: None, module: None, locals}); + frames.push(Frame { + name, + filename, + line, + short_filename: None, + module: None, + locals, + }); if frames.len() > 4096 { return Err(format_err!("Max frame recursion depth reached")); } @@ -141,7 +174,16 @@ pub fn get_stack_trace(thread: &T, process: &P, copy_locals: bool, lineno: frame_ptr = frame.back(); } - Ok(StackTrace{pid: 0, frames, thread_id: thread.thread_id(), thread_name: None, owns_gil: false, active: true, os_thread_id: thread.native_thread_id(), process_info: None}) + Ok(StackTrace { + pid: 0, + frames, + thread_id: thread.thread_id(), + thread_name: None, + owns_gil: false, + active: true, + os_thread_id: thread.native_thread_id(), + process_info: None, + }) } impl StackTrace { @@ -155,27 +197,35 @@ impl StackTrace { pub fn format_threadid(&self) -> String { // native threadids in osx are kinda useless, use the pthread id instead - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] return format!("{:#X}", self.thread_id); // otherwise use the native threadid if given - #[cfg(not(target_os="macos"))] + #[cfg(not(target_os = "macos"))] match self.os_thread_id { Some(tid) => format!("{}", tid), - None => format!("{:#X}", self.thread_id) + None => format!("{:#X}", self.thread_id), } } } /// Returns the line number from a PyCodeObject (given the lasti index from a PyFrameObject) -fn get_line_number(code: &C, lasti: i32, process: &P) -> Result { - let table = copy_bytes(code.line_table(), process).context("Failed to copy line number table")?; +fn get_line_number( + code: &C, + lasti: i32, + process: &P, +) -> Result { + let table = + copy_bytes(code.line_table(), process).context("Failed to copy line number table")?; Ok(code.get_line_number(lasti, &table)) } - -fn get_locals(code: &C, frameptr: *const F, frame: &F, process: &P) - -> Result, Error> { +fn get_locals( + code: &C, + frameptr: *const F, + frame: &F, + process: &P, +) -> Result, Error> { let local_count = code.nlocals() as usize; let argcount = code.argcount() as usize; let varnames = process.copy_pointer(code.varnames())?; @@ -186,19 +236,27 @@ fn get_locals(code: &C, framept let mut ret = Vec::new(); for i in 0..local_count { - let nameptr: *const C::StringObject = process.copy_struct(varnames.address(code.varnames() as usize, i))?; + let nameptr: *const C::StringObject = + process.copy_struct(varnames.address(code.varnames() as usize, i))?; let name = copy_string(nameptr, process)?; let addr: usize = process.copy_struct(locals_addr + i * ptr_size)?; if addr == 0 { continue; } - ret.push(LocalVariable{name, addr, arg: i < argcount, repr: None}); + ret.push(LocalVariable { + name, + addr, + arg: i < argcount, + repr: None, + }); } Ok(ret) } -pub fn get_gil_threadid(threadstate_address: usize, process: &P) - -> Result { +pub fn get_gil_threadid( + threadstate_address: usize, + process: &P, +) -> Result { // figure out what thread has the GIL by inspecting _PyThreadState_Current if threadstate_address > 0 { let addr: usize = process.copy_struct(threadstate_address)?; @@ -214,25 +272,32 @@ pub fn get_gil_threadid(threadstate_addre impl ProcessInfo { pub fn to_frame(&self) -> Frame { - Frame{name: format!("process {}:\"{}\"", self.pid, self.command_line), + Frame { + name: format!("process {}:\"{}\"", self.pid, self.command_line), filename: String::from(""), - module: None, short_filename: None, line: 0, locals: None} + module: None, + short_filename: None, + line: 0, + locals: None, + } } } #[cfg(test)] mod tests { use super::*; - use remoteprocess::LocalProcess; - use crate::python_bindings::v3_7_0::{PyCodeObject}; + use crate::python_bindings::v3_7_0::PyCodeObject; use crate::python_data_access::tests::to_byteobject; + use remoteprocess::LocalProcess; #[test] fn test_get_line_number() { let mut lnotab = to_byteobject(&[0u8, 1, 10, 1, 8, 1, 4, 1]); - let code = PyCodeObject{co_firstlineno: 3, - co_lnotab: &mut lnotab.base.ob_base.ob_base, - ..Default::default()}; + let code = PyCodeObject { + co_firstlineno: 3, + co_lnotab: &mut lnotab.base.ob_base.ob_base, + ..Default::default() + }; let lineno = get_line_number(&code, 30, &LocalProcess).unwrap(); assert_eq!(lineno, 7); } diff --git a/src/timer.rs b/src/timer.rs index af7ecbf9..f88486d3 100644 --- a/src/timer.rs +++ b/src/timer.rs @@ -1,9 +1,9 @@ -use std::time::{Instant, Duration}; +use std::time::{Duration, Instant}; #[cfg(windows)] use winapi::um::timeapi; use rand; -use rand_distr::{Exp, Distribution}; +use rand_distr::{Distribution, Exp}; /// Timer is an iterator that sleeps an appropriate amount of time between iterations /// so that we can sample the process a certain number of times a second. @@ -25,10 +25,16 @@ impl Timer { // https://randomascii.wordpress.com/2013/07/08/windows-timer-resolution-megawatts-wasted/ // and http://www.belshe.com/2010/06/04/chrome-cranking-up-the-clock/ #[cfg(windows)] - unsafe { timeapi::timeBeginPeriod(1); } + unsafe { + timeapi::timeBeginPeriod(1); + } let start = Instant::now(); - Timer{start, desired: Duration::from_secs(0), exp: Exp::new(rate).unwrap()} + Timer { + start, + desired: Duration::from_secs(0), + exp: Exp::new(rate).unwrap(), + } } } @@ -60,6 +66,8 @@ impl Iterator for Timer { impl Drop for Timer { fn drop(&mut self) { #[cfg(windows)] - unsafe { timeapi::timeEndPeriod(1); } + unsafe { + timeapi::timeEndPeriod(1); + } } } diff --git a/src/utils.rs b/src/utils.rs index f9674fa7..8627e7d9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -11,9 +11,9 @@ pub fn resolve_filename(filename: &str, modulename: &str) -> Option { let module = Path::new(modulename); if let Some(parent) = module.parent() { if let Some(name) = path.file_name() { - let temp = parent.join(name); + let temp = parent.join(name); if temp.exists() { - return Some(temp.to_string_lossy().to_owned().to_string()) + return Some(temp.to_string_lossy().to_owned().to_string()); } } } diff --git a/src/version.rs b/src/version.rs index 8b079d26..c4dbda0e 100644 --- a/src/version.rs +++ b/src/version.rs @@ -2,27 +2,29 @@ use lazy_static::lazy_static; use regex::bytes::Regex; use std; -use anyhow::{Error}; - +use anyhow::Error; #[derive(Debug, PartialEq, Eq, Clone)] pub struct Version { pub major: u64, pub minor: u64, pub patch: u64, - pub release_flags: String + pub release_flags: String, } impl Version { pub fn scan_bytes(data: &[u8]) -> Result { lazy_static! { - static ref RE: Regex = Regex::new(r"((2|3)\.(3|4|5|6|7|8|9|10|11)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+? (.{1,64})").unwrap(); + static ref RE: Regex = Regex::new( + r"((2|3)\.(3|4|5|6|7|8|9|10|11)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+? (.{1,64})" + ) + .unwrap(); } if let Some(cap) = RE.captures_iter(data).next() { let release = match cap.get(5) { - Some(x) => { std::str::from_utf8(x.as_bytes())? }, - None => "" + Some(x) => std::str::from_utf8(x.as_bytes())?, + None => "", }; let major = std::str::from_utf8(&cap[2])?.parse::()?; let minor = std::str::from_utf8(&cap[3])?.parse::()?; @@ -41,7 +43,12 @@ impl Version { } } - return Ok(Version{major, minor, patch, release_flags:release.to_owned()}); + return Ok(Version { + major, + minor, + patch, + release_flags: release.to_owned(), + }); } Err(format_err!("failed to find version string")) } @@ -49,7 +56,11 @@ impl Version { impl std::fmt::Display for Version { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}.{}.{}{}", self.major, self.minor, self.patch, self.release_flags) + write!( + f, + "{}.{}.{}{}", + self.major, self.minor, self.patch, self.release_flags + ) } } @@ -59,18 +70,58 @@ mod tests { #[test] fn test_find_version() { let version = Version::scan_bytes(b"2.7.10 (default, Oct 6 2017, 22:29:07)").unwrap(); - assert_eq!(version, Version{major: 2, minor: 7, patch: 10, release_flags: "".to_owned()}); - - let version = Version::scan_bytes(b"3.6.3 |Anaconda custom (64-bit)| (default, Oct 6 2017, 12:04:38)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 6, patch: 3, release_flags: "".to_owned()}); - - let version = Version::scan_bytes(b"Python 3.7.0rc1 (v3.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 7, patch: 0, release_flags: "rc1".to_owned()}); - - let version = Version::scan_bytes(b"Python 3.10.0rc1 (tags/v3.10.0rc1, Aug 28 2021, 18:25:40)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 10, patch: 0, release_flags: "rc1".to_owned()}); + assert_eq!( + version, + Version { + major: 2, + minor: 7, + patch: 10, + release_flags: "".to_owned() + } + ); + + let version = Version::scan_bytes( + b"3.6.3 |Anaconda custom (64-bit)| (default, Oct 6 2017, 12:04:38)", + ) + .unwrap(); + assert_eq!( + version, + Version { + major: 3, + minor: 6, + patch: 3, + release_flags: "".to_owned() + } + ); + + let version = + Version::scan_bytes(b"Python 3.7.0rc1 (v3.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)") + .unwrap(); + assert_eq!( + version, + Version { + major: 3, + minor: 7, + patch: 0, + release_flags: "rc1".to_owned() + } + ); + + let version = + Version::scan_bytes(b"Python 3.10.0rc1 (tags/v3.10.0rc1, Aug 28 2021, 18:25:40)") + .unwrap(); + assert_eq!( + version, + Version { + major: 3, + minor: 10, + patch: 0, + release_flags: "rc1".to_owned() + } + ); - let version = Version::scan_bytes(b"1.7.0rc1 (v1.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)"); + let version = + Version::scan_bytes(b"1.7.0rc1 (v1.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)"); assert!(version.is_err(), "don't match unsupported "); let version = Version::scan_bytes(b"3.7 10 "); @@ -81,6 +132,14 @@ mod tests { // v2.7.15+ is a valid version string apparently: https://github.com/benfred/py-spy/issues/81 let version = Version::scan_bytes(b"2.7.15+ (default, Oct 2 2018, 22:12:08)").unwrap(); - assert_eq!(version, Version{major: 2, minor: 7, patch: 15, release_flags: "".to_owned()}); + assert_eq!( + version, + Version { + major: 2, + minor: 7, + patch: 15, + release_flags: "".to_owned() + } + ); } } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 233d0b66..e5db6bbb 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,6 +1,6 @@ extern crate py_spy; +use py_spy::{Config, Pid, PythonSpy}; use std::collections::HashSet; -use py_spy::{Config, PythonSpy, Pid}; struct ScriptRunner { #[allow(dead_code)] @@ -9,11 +9,16 @@ struct ScriptRunner { impl ScriptRunner { fn new(process_name: &str, filename: &str) -> ScriptRunner { - let child = std::process::Command::new(process_name).arg(filename).spawn().unwrap(); - ScriptRunner{child} + let child = std::process::Command::new(process_name) + .arg(filename) + .spawn() + .unwrap(); + ScriptRunner { child } } - fn id(&self) -> Pid { self.child.id() as _ } + fn id(&self) -> Pid { + self.child.id() as _ + } } impl Drop for ScriptRunner { @@ -27,7 +32,7 @@ impl Drop for ScriptRunner { struct TestRunner { #[allow(dead_code)] child: ScriptRunner, - spy: PythonSpy + spy: PythonSpy, } impl TestRunner { @@ -35,13 +40,13 @@ impl TestRunner { let child = ScriptRunner::new("python", filename); std::thread::sleep(std::time::Duration::from_millis(400)); let spy = PythonSpy::retry_new(child.id(), &config, 20).unwrap(); - TestRunner{child, spy} + TestRunner { child, spy } } } #[test] fn test_busy_loop() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -65,7 +70,10 @@ fn test_thread_reuse() { // and this caused errors on native unwind (since the native thread had // exited). Test that this works with a simple script that creates // a couple short lived threads, and then profiling with native enabled - let config = Config{native: true, ..Default::default()}; + let config = Config { + native: true, + ..Default::default() + }; let mut runner = TestRunner::new(config, "./tests/scripts/thread_reuse.py"); let mut errors = 0; @@ -86,7 +94,7 @@ fn test_thread_reuse() { #[test] fn test_long_sleep() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -102,18 +110,24 @@ fn test_long_sleep() { // Make sure the stack trace is what we expect assert_eq!(trace.frames[0].name, "longsleep"); - assert_eq!(trace.frames[0].short_filename, Some("longsleep.py".to_owned())); + assert_eq!( + trace.frames[0].short_filename, + Some("longsleep.py".to_owned()) + ); assert_eq!(trace.frames[0].line, 5); assert_eq!(trace.frames[1].name, ""); assert_eq!(trace.frames[1].line, 9); - assert_eq!(trace.frames[1].short_filename, Some("longsleep.py".to_owned())); + assert_eq!( + trace.frames[1].short_filename, + Some("longsleep.py".to_owned()) + ); assert!(!traces[0].owns_gil); // we should reliably be able to detect the thread is sleeping on osx/windows // linux+freebsd is trickier - #[cfg(any(target_os="macos", target_os="windows"))] + #[cfg(any(target_os = "macos", target_os = "windows"))] assert!(!traces[0].active); } @@ -154,7 +168,7 @@ fn test_thread_names() { #[test] fn test_recursive() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -176,7 +190,7 @@ fn test_recursive() { assert!(trace.frames.len() <= 22); - let top_level_frame = &trace.frames[trace.frames.len()-1]; + let top_level_frame = &trace.frames[trace.frames.len() - 1]; assert_eq!(top_level_frame.name, ""); assert!((top_level_frame.line == 8) || (top_level_frame.line == 7)); @@ -186,7 +200,7 @@ fn test_recursive() { #[test] fn test_unicode() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { if unsafe { libc::geteuid() } != 0 { return; @@ -199,19 +213,25 @@ fn test_unicode() { let trace = &traces[0]; assert_eq!(trace.frames[0].name, "function1"); - assert_eq!(trace.frames[0].short_filename, Some("unicode💩.py".to_owned())); + assert_eq!( + trace.frames[0].short_filename, + Some("unicode💩.py".to_owned()) + ); assert_eq!(trace.frames[0].line, 6); assert_eq!(trace.frames[1].name, ""); assert_eq!(trace.frames[1].line, 9); - assert_eq!(trace.frames[1].short_filename, Some("unicode💩.py".to_owned())); + assert_eq!( + trace.frames[1].short_filename, + Some("unicode💩.py".to_owned()) + ); assert!(!traces[0].owns_gil); } #[test] fn test_local_vars() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -219,7 +239,10 @@ fn test_local_vars() { } } - let config = Config{dump_locals: 1, ..Default::default()}; + let config = Config { + dump_locals: 1, + ..Default::default() + }; let mut runner = TestRunner::new(config, "./tests/scripts/local_vars.py"); let traces = runner.spy.get_stack_traces().unwrap(); @@ -277,14 +300,17 @@ fn test_local_vars() { // we only support dictionary lookup on python 3.6+ right now if runner.spy.version.major == 3 && runner.spy.version.minor >= 6 { - assert_eq!(local5.repr, Some("{\"a\": False, \"b\": (1, 2, 3)}".to_owned())); + assert_eq!( + local5.repr, + Some("{\"a\": False, \"b\": (1, 2, 3)}".to_owned()) + ); } } -#[cfg(not(target_os="freebsd"))] +#[cfg(not(target_os = "freebsd"))] #[test] fn test_subprocesses() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -296,7 +322,10 @@ fn test_subprocesses() { // was in a zombie state. Verify that this works now let process = ScriptRunner::new("python", "./tests/scripts/subprocesses.py"); std::thread::sleep(std::time::Duration::from_millis(1000)); - let config = Config{subprocesses: true, ..Default::default()}; + let config = Config { + subprocesses: true, + ..Default::default() + }; let sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); std::thread::sleep(std::time::Duration::from_millis(1000)); @@ -318,10 +347,10 @@ fn test_subprocesses() { } } -#[cfg(not(target_os="freebsd"))] +#[cfg(not(target_os = "freebsd"))] #[test] fn test_subprocesses_zombiechild() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { @@ -333,20 +362,26 @@ fn test_subprocesses_zombiechild() { // was in a zombie state. Verify that this works now let process = ScriptRunner::new("python", "./tests/scripts/subprocesses_zombie_child.py"); std::thread::sleep(std::time::Duration::from_millis(200)); - let config = Config{subprocesses: true, ..Default::default()}; + let config = Config { + subprocesses: true, + ..Default::default() + }; let _sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); } #[test] fn test_negative_linenumber_increment() { - #[cfg(target_os="macos")] + #[cfg(target_os = "macos")] { // We need root permissions here to run this on OSX if unsafe { libc::geteuid() } != 0 { return; } } - let mut runner = TestRunner::new(Config::default(), "./tests/scripts/negative_linenumber_offsets.py"); + let mut runner = TestRunner::new( + Config::default(), + "./tests/scripts/negative_linenumber_offsets.py", + ); let traces = runner.spy.get_stack_traces().unwrap(); assert_eq!(traces.len(), 1); @@ -360,22 +395,25 @@ fn test_negative_linenumber_increment() { assert!(trace.frames[1].line >= 5 && trace.frames[0].line <= 10); assert_eq!(trace.frames[2].name, ""); assert_eq!(trace.frames[2].line, 13) - }, + } 2 => { assert_eq!(trace.frames[0].name, "f"); assert!(trace.frames[0].line >= 5 && trace.frames[0].line <= 10); assert_eq!(trace.frames[1].name, ""); assert_eq!(trace.frames[1].line, 13); - }, - _ => panic!("Unknown python major version") + } + _ => panic!("Unknown python major version"), } } -#[cfg(target_os="linux")] +#[cfg(target_os = "linux")] #[test] fn test_delayed_subprocess() { let process = ScriptRunner::new("bash", "./tests/scripts/delayed_launch.sh"); - let config = Config{subprocesses: true, ..Default::default()}; + let config = Config { + subprocesses: true, + ..Default::default() + }; let sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); for sample in sampler { // should have one trace from the subprocess From ea18ba3cfd0f81a81bcabe95ac68fbfd38c0858c Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Tue, 21 Feb 2023 12:37:17 +0100 Subject: [PATCH 2/9] Implement pyroscope reporting --- Cargo.lock | 760 +++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 + src/config.rs | 119 +++++++- src/main.rs | 182 ++++++++++++ 4 files changed, 1056 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b91a9189..dd866bda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,6 +83,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" + [[package]] name = "bindgen" version = "0.59.2" @@ -112,12 +118,24 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + [[package]] name = "bytemuck" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5377c8865e74a160d21f29c2d40669f53286db6eab59b88540cbb12ffc8b835" +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + [[package]] name = "cc" version = "1.0.73" @@ -241,6 +259,22 @@ dependencies = [ "winapi", ] +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + [[package]] name = "cpp_demangle" version = "0.3.5" @@ -313,6 +347,15 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "encoding_rs" +version = "0.8.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" +dependencies = [ + "cfg-if", +] + [[package]] name = "env_logger" version = "0.9.0" @@ -372,6 +415,84 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" + +[[package]] +name = "futures-io" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" + +[[package]] +name = "futures-sink" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" + +[[package]] +name = "futures-task" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" + +[[package]] +name = "futures-util" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +dependencies = [ + "futures-core", + "futures-io", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "getrandom" version = "0.2.7" @@ -410,6 +531,25 @@ dependencies = [ "scroll", ] +[[package]] +name = "h2" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -434,12 +574,93 @@ dependencies = [ "libc", ] +[[package]] +name = "http" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.2", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hyper" +version = "0.14.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e011372fa0b68db8350aa7a248930ecc7839bf46d8485577d69f117a75f164c" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa 1.0.2", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "idna" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -495,6 +716,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "ipnet" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" + [[package]] name = "itoa" version = "0.4.8" @@ -507,6 +734,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" +[[package]] +name = "js-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -630,6 +866,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -645,6 +887,36 @@ dependencies = [ "adler", ] +[[package]] +name = "mio" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" +dependencies = [ + "libc", + "log", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.42.0", +] + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "nix" version = "0.24.2" @@ -734,12 +1006,67 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +[[package]] +name = "openssl" +version = "0.10.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b102428fd03bc5edf97f62620f7298614c45cedf287c271e7ed450bbaf83f2e1" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23bbbf7854cd45b83958ebe919f0e8e516793727652e27fda10a8384cfc790b7" +dependencies = [ + "autocfg", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "os_str_bytes" version = "6.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4" +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + [[package]] name = "parking_lot_core" version = "0.9.3" @@ -750,7 +1077,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -759,6 +1086,30 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +[[package]] +name = "percent-encoding" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" + [[package]] name = "plain" version = "0.2.3" @@ -844,11 +1195,13 @@ dependencies = [ "rand_distr", "regex", "remoteprocess", + "reqwest", "serde", "serde_derive", "serde_json", "tempfile", "termios", + "tokio", "winapi", ] @@ -986,6 +1339,43 @@ dependencies = [ "winapi", ] +[[package]] +name = "reqwest" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + [[package]] name = "rgb" version = "0.8.33" @@ -1013,6 +1403,15 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +[[package]] +name = "schannel" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" +dependencies = [ + "windows-sys 0.42.0", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -1039,6 +1438,29 @@ dependencies = [ "syn", ] +[[package]] +name = "security-framework" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c4437699b6d34972de58652c68b98cb5b53a4199ab126db8e20ec8ded29a721" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.140" @@ -1067,18 +1489,58 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa 1.0.2", + "ryu", + "serde", +] + [[package]] name = "shlex" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +dependencies = [ + "autocfg", +] + [[package]] name = "smallvec" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" +[[package]] +name = "socket2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -1185,18 +1647,152 @@ dependencies = [ "winapi", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" +dependencies = [ + "autocfg", + "bytes", + "libc", + "memchr", + "mio", + "num_cpus", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.42.0", +] + +[[package]] +name = "tokio-macros" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + +[[package]] +name = "unicode-bidi" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" + [[package]] name = "unicode-ident" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-width" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +[[package]] +name = "url" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vec_map" version = "0.8.2" @@ -1209,6 +1805,16 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -1221,6 +1827,82 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + +[[package]] +name = "web-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "which" version = "4.2.5" @@ -1269,39 +1951,105 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" + +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] diff --git a/Cargo.toml b/Cargo.toml index 0676a2e6..0aeb0a8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,8 @@ rand = "0.8" rand_distr = "0.4" remoteprocess = {version="0.4.10", features=["unwind"]} chrono = "0.4.19" +reqwest = { version = "0.11", features = ["blocking"] } +tokio = { version = "1", features = ["full"] } [dev-dependencies] py-spy-testdata = "0.1.0" diff --git a/src/config.rs b/src/config.rs index e8a78d9a..98c7790e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -59,6 +59,15 @@ pub struct Config { pub refresh_seconds: f64, #[doc(hidden)] pub core_filename: Option, + + #[doc(hidden)] + pub pyroscope_url: Option, + #[doc(hidden)] + pub pyroscope_app: Option, + #[doc(hidden)] + pub pyroscope_tags: Option, + #[doc(hidden)] + pub pyroscope_report_interval: u64, } #[allow(non_camel_case_types)] @@ -138,6 +147,10 @@ impl Default for Config { lineno: LineNo::LastInstruction, refresh_seconds: 1.0, core_filename: None, + pyroscope_app: None, + pyroscope_tags: None, + pyroscope_url: None, + pyroscope_report_interval: 1000, } } } @@ -274,6 +287,78 @@ impl Config { .help("Hides progress bar (useful for showing error output on record)"), ); + let pyroscope = Command::new("pyroscope") + .about("Sends stack trace information to pyroscope") + .arg(program.clone()) + .arg(pid.clone().required_unless_present("python_program")) + .arg(full_filenames.clone()) + .arg( + Arg::new("url") + .short('y') + .long("pyroscope_url") + .value_name("http://localhost:4040") + .help("Pyroscope URL") + .takes_value(true) + .required(true), + ) + .arg( + Arg::new("tags") + .long("pyroscope_tags") + .value_name("env=staging,region=us-west-1") + .help("Pyroscope tags") + .takes_value(true) + .required(false), + ) + .arg( + Arg::new("app") + .long("pyroscope_app") + .value_name("app") + .help("Pyroscope app name") + .takes_value(true) + .required(true), + ) + .arg( + Arg::new("report_interval") + .long("pyroscope_report_interval") + .value_name("report_interval") + .help("Pyroscope report interval") + .takes_value(true) + .required(true), + ) + .arg( + Arg::new("duration") + .short('d') + .long("duration") + .value_name("duration") + .help("The number of seconds to sample for") + .default_value("unlimited") + .takes_value(true), + ) + .arg(rate.clone()) + .arg(subprocesses.clone()) + .arg(Arg::new("function").short('F').long("function").help( + "Aggregate samples by function's first line number, instead of current line number", + )) + .arg( + Arg::new("nolineno") + .long("nolineno") + .help("Do not show line numbers"), + ) + .arg( + Arg::new("threads") + .short('t') + .long("threads") + .help("Show thread ids in the output"), + ) + .arg(gil.clone()) + .arg(idle.clone()) + .arg( + Arg::new("capture") + .long("capture") + .hide(true) + .help("Captures output from child process"), + ); + let top = Command::new("top") .about("Displays a top like view of functions consuming CPU") .arg(program.clone()) @@ -333,6 +418,8 @@ impl Config { let top = top.arg(native.clone()); #[cfg(unwind)] let dump = dump.arg(native.clone()); + #[cfg(unwind)] + let pyroscope = pyroscope.arg(native.clone()); // Nonblocking isn't an option for freebsd, remove #[cfg(not(target_os = "freebsd"))] @@ -341,6 +428,8 @@ impl Config { let top = top.arg(nonblocking.clone()); #[cfg(not(target_os = "freebsd"))] let dump = dump.arg(nonblocking.clone()); + #[cfg(not(target_os = "freebsd"))] + let pyroscope = pyroscope.arg(nonblocking.clone()); let mut app = Command::new(crate_name!()) .version(crate_version!()) @@ -352,6 +441,7 @@ impl Config { .subcommand(record) .subcommand(top) .subcommand(dump) + .subcommand(pyroscope) .subcommand(completions); let matches = app.clone().try_get_matches_from(args)?; info!("Command line args: {:?}", matches); @@ -387,6 +477,33 @@ impl Config { } config.hide_progress = matches.occurrences_of("hideprogress") > 0; } + "pyroscope" => { + config.sampling_rate = matches.value_of_t("rate")?; + config.duration = match matches.value_of("duration") { + Some("unlimited") | None => RecordDuration::Unlimited, + Some(seconds) => { + RecordDuration::Seconds(seconds.parse().expect("invalid duration")) + } + }; + config.pyroscope_report_interval = matches.value_of_t("report_interval")?; + config.show_line_numbers = matches.occurrences_of("nolineno") == 0; + config.lineno = if matches.occurrences_of("nolineno") > 0 { + LineNo::NoLine + } else if matches.occurrences_of("function") > 0 { + LineNo::FirstLineNo + } else { + LineNo::LastInstruction + }; + config.include_thread_ids = matches.occurrences_of("threads") > 0; + config.pyroscope_url = matches.value_of("url").map(|f| f.to_owned()); + config.pyroscope_app = matches.value_of("app").map(|f| f.to_owned()); + config.pyroscope_tags = matches.value_of("tags").map(|f| f.to_owned()); + if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 + { + eprintln!("--function & --nolinenos can't be used together"); + std::process::exit(1); + } + } "top" => { config.sampling_rate = matches.value_of_t("rate")?; config.refresh_seconds = *matches.get_one::("delay").unwrap(); @@ -410,7 +527,7 @@ impl Config { } match subcommand { - "record" | "top" => { + "record" | "top" | "pyroscope" => { config.python_program = matches .values_of("python_program") .map(|vals| vals.map(|v| v.to_owned()).collect()); diff --git a/src/main.rs b/src/main.rs index 10c3a78a..e053f03b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -37,6 +37,7 @@ use console::style; use config::{Config, FileFormat, RecordDuration}; use console_viewer::ConsoleViewer; +use reqwest::StatusCode; use stack_trace::{Frame, StackTrace}; use chrono::{Local, SecondsFormat}; @@ -339,6 +340,184 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> Ok(()) } +fn sample_pyroscope(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> { + let mut output = RawFlamegraph(flamegraph::Flamegraph::new(config.show_line_numbers)); + let pyroscope_url = format!("{}/ingest", config.pyroscope_url.as_ref().unwrap()); + let pyroscope_app = config.pyroscope_app.as_ref().unwrap(); + let pyroscope_tags = config.pyroscope_tags.as_deref().unwrap_or(""); + let name = format!("{}{{{}}}", pyroscope_app, pyroscope_tags); + + let sampler = sampler::Sampler::new(pid, config)?; + + let lede = format!("{}{} ", style("py-spy").bold().green(), style(">").dim()); + + let report_interval = config.pyroscope_report_interval; + + let max_intervals = match &config.duration { + RecordDuration::Unlimited => { + println!("{}Sampling process {} times a second, sending aggregated pyroscope reports every {} samples. Press Control-C to exit.", lede, config.sampling_rate, report_interval); + None + } + RecordDuration::Seconds(sec) => { + println!("{}Sampling process {} times a second for {} seconds, sending aggregated pyroscope reports every {} samples. Press Control-C to exit.", lede, config.sampling_rate, sec, report_interval); + Some(sec * config.sampling_rate) + } + }; + + let client = reqwest::blocking::Client::new(); + + let mut errors = 0; + let mut intervals = 0; + let mut send_samples = 0; + let mut samples = 0; + println!(); + + let running = Arc::new(AtomicBool::new(true)); + let r = running.clone(); + ctrlc::set_handler(move || { + r.store(false, Ordering::SeqCst); + })?; + + let mut exit_message = "Stopped sampling because process exited"; + let mut last_late_message = std::time::Instant::now(); + + let mut start_ts = Local::now().timestamp(); + + for mut sample in sampler { + if let Some(delay) = sample.late { + if delay > Duration::from_secs(1) { + if config.hide_progress { + // display a message if we're late, but don't spam the log + let now = std::time::Instant::now(); + if now - last_late_message > Duration::from_secs(1) { + last_late_message = now; + println!("{}{:.2?} behind in sampling, results may be inaccurate. Try reducing the sampling rate", lede, delay) + } + } else { + println!("{:.2?} behind in sampling, results may be inaccurate. Try reducing the sampling rate.", delay); + } + } + } + + if !running.load(Ordering::SeqCst) { + exit_message = "Stopped sampling because Control-C pressed"; + break; + } + + intervals += 1; + if let Some(max_intervals) = max_intervals { + if intervals >= max_intervals { + exit_message = ""; + break; + } + } + + for trace in sample.traces.iter_mut() { + if !(config.include_idle || trace.active) { + continue; + } + + if config.gil_only && !trace.owns_gil { + continue; + } + + if config.include_thread_ids { + let threadid = trace.format_threadid(); + trace.frames.push(Frame { + name: format!("thread ({})", threadid), + filename: String::from(""), + module: None, + short_filename: None, + line: 0, + locals: None, + }); + } + + if let Some(process_info) = trace.process_info.as_ref().map(|x| x) { + trace.frames.push(process_info.to_frame()); + let mut parent = process_info.parent.as_ref(); + while parent.is_some() { + if let Some(process_info) = parent { + trace.frames.push(process_info.to_frame()); + parent = process_info.parent.as_ref(); + } + } + } + + samples += 1; + output.increment(&trace)?; + } + + send_samples += 1; + if send_samples >= report_interval { + let mut body: Vec = Vec::new(); + output.write(&mut body)?; + let res = client + .post(&pyroscope_url) + .query(&[ + ("from", start_ts.to_string()), + ("until", Local::now().timestamp().to_string()), + ("name", name.clone()), + ("sampleRate", config.sampling_rate.to_string()), + ]) + .body(body) + .send()?; + start_ts = Local::now().timestamp(); + output = RawFlamegraph(flamegraph::Flamegraph::new(config.show_line_numbers)); + send_samples = 0; + + if res.status() != StatusCode::OK { + println!( + "{}An error occurred while sending data to pyroscope: {:#?}", + lede, res + ) + } else { + println!("{}Sent pyroscope report!", lede); + } + } + + if let Some(sampling_errors) = sample.sampling_errors { + for (pid, e) in sampling_errors { + warn!("Failed to get stack trace from {}: {}", pid, e); + errors += 1; + } + } + } + + let mut body: Vec = Vec::new(); + output.write(&mut body)?; + let res = client + .post(&pyroscope_url) + .query(&[ + ("from", start_ts.to_string()), + ("until", Local::now().timestamp().to_string()), + ("name", name.clone()), + ("sampleRate", config.sampling_rate.to_string()), + ]) + .body(body) + .send()?; + + if res.status() != StatusCode::OK { + println!( + "{}An error occurred while sending data to pyroscope: {:#?}", + lede, res + ) + } else { + println!("{}Sent final pyroscope report!", lede); + } + + if !exit_message.is_empty() { + println!("\n{}{}", lede, exit_message); + } + + println!( + "{}Reported data to '{}'. Samples: {} Errors: {}", + lede, pyroscope_url, samples, errors + ); + + Ok(()) +} + fn run_spy_command(pid: remoteprocess::Pid, config: &config::Config) -> Result<(), Error> { match config.command.as_ref() { "dump" => { @@ -350,6 +529,9 @@ fn run_spy_command(pid: remoteprocess::Pid, config: &config::Config) -> Result<( "top" => { sample_console(pid, config)?; } + "pyroscope" => { + sample_pyroscope(pid, config)?; + } _ => { // shouldn't happen return Err(format_err!("Unknown command {}", config.command)); From b6632e8ed04333dfb363620d2905a57fe53bc6d8 Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Tue, 21 Feb 2023 12:40:58 +0100 Subject: [PATCH 3/9] Cleanup --- src/main.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index e053f03b..5ecacce7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -455,10 +455,10 @@ fn sample_pyroscope(pid: remoteprocess::Pid, config: &Config) -> Result<(), Erro let res = client .post(&pyroscope_url) .query(&[ - ("from", start_ts.to_string()), - ("until", Local::now().timestamp().to_string()), - ("name", name.clone()), - ("sampleRate", config.sampling_rate.to_string()), + ("from", &start_ts.to_string()), + ("until", &Local::now().timestamp().to_string()), + ("name", &name), + ("sampleRate", &config.sampling_rate.to_string()), ]) .body(body) .send()?; @@ -489,10 +489,10 @@ fn sample_pyroscope(pid: remoteprocess::Pid, config: &Config) -> Result<(), Erro let res = client .post(&pyroscope_url) .query(&[ - ("from", start_ts.to_string()), - ("until", Local::now().timestamp().to_string()), - ("name", name.clone()), - ("sampleRate", config.sampling_rate.to_string()), + ("from", &start_ts.to_string()), + ("until", &Local::now().timestamp().to_string()), + ("name", &name), + ("sampleRate", &config.sampling_rate.to_string()), ]) .body(body) .send()?; From 48f08b2331a548691c3cab4b8a2bb93eb07d335f Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Tue, 21 Feb 2023 12:55:16 +0100 Subject: [PATCH 4/9] Add docs --- README.md | 18 ++++++++++++++++-- src/config.rs | 3 +-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 713eea98..92e13242 100755 --- a/README.md +++ b/README.md @@ -36,8 +36,8 @@ can be installed with ```apk add py-spy --update-cache --repository http://dl-3. ## Usage py-spy works from the command line and takes either the PID of the program you want to sample from -or the command line of the python program you want to run. py-spy has three subcommands -```record```, ```top``` and ```dump```: +or the command line of the python program you want to run. py-spy has four subcommands +```record```, ```pyroscope```, ```top``` and ```dump```: ### record @@ -61,6 +61,20 @@ See ```py-spy record --help``` for information on other options including changi the sampling rate, filtering to only include threads that hold the GIL, profiling native C extensions, showing thread-ids, profiling subprocesses and more. +### pyroscope + +py-spy supports sending profiles to a [pyroscope](https://pyroscope.io) instance using the ```pyroscope``` command. For example, you can profile your python process by running: + +``` bash +py-spy pyroscope -si --pyroscope_url http://localhost:4040 --pyroscope_app app --pyroscope_tags 'a,b' --pyroscope_report_interval 1000 python myprogram.py +``` + +The pyroscope reporting interval is relative the the number of samples, for example if the sampling rate is 100 and the report interval is 1000, an aggregated report is uploaded to pyroscope every 1000 samples, aka every 10 seconds. + +See ```py-spy pyroscope --help``` for information on other options including changing +the sampling rate, reporting interval, filtering to only include threads that hold the GIL, profiling native C extensions, +showing thread-ids, profiling subprocesses and more. + ### top Top shows a live view of what functions are taking the most time in your python program, similar diff --git a/src/config.rs b/src/config.rs index 98c7790e..a6a570cb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -294,7 +294,6 @@ impl Config { .arg(full_filenames.clone()) .arg( Arg::new("url") - .short('y') .long("pyroscope_url") .value_name("http://localhost:4040") .help("Pyroscope URL") @@ -321,7 +320,7 @@ impl Config { Arg::new("report_interval") .long("pyroscope_report_interval") .value_name("report_interval") - .help("Pyroscope report interval") + .help("Pyroscope reporting interval, will send an aggregated report as soon as this many samples are collected.") .takes_value(true) .required(true), ) From 783fee8500ca6554e9a3ba76cd6cb3d39067dfe9 Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Fri, 2 Jun 2023 21:44:18 +0200 Subject: [PATCH 5/9] Fix --- src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index bdc1278a..335d80c7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -489,7 +489,7 @@ impl Config { config.lineno = if matches.occurrences_of("nolineno") > 0 { LineNo::NoLine } else if matches.occurrences_of("function") > 0 { - LineNo::FirstLineNo + LineNo::First } else { LineNo::LastInstruction }; From 069764aa947630c1a6a26010fbad0a70bb6f0d26 Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Tue, 7 May 2024 14:00:59 +0200 Subject: [PATCH 6/9] Fmt --- src/config.rs | 14 +++++++------- src/coredump.rs | 2 +- src/cython.rs | 2 +- src/main.rs | 4 ++-- src/python_data_access.rs | 2 +- src/sampler.rs | 6 +++--- src/speedscope.rs | 2 +- src/timer.rs | 1 - 8 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/config.rs b/src/config.rs index 335d80c7..42a3b66c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -537,7 +537,7 @@ impl Config { } config.subprocesses = matches.occurrences_of("subprocesses") > 0; - config.command = subcommand.to_owned(); + subcommand.clone_into(&mut config.command); // options that can be shared between subcommands config.pid = matches @@ -639,14 +639,14 @@ mod tests { ); // test out overriding these params by setting flags - assert_eq!(config.include_idle, false); - assert_eq!(config.gil_only, false); - assert_eq!(config.include_thread_ids, false); + assert!(!config.include_idle); + assert!(!config.gil_only); + assert!(!config.include_thread_ids); let config_flags = get_config("py-spy r -p 1234 -o foo --idle --gil --threads").unwrap(); - assert_eq!(config_flags.include_idle, true); - assert_eq!(config_flags.gil_only, true); - assert_eq!(config_flags.include_thread_ids, true); + assert!(config_flags.include_idle); + assert!(config_flags.gil_only); + assert!(config_flags.include_thread_ids); } #[test] diff --git a/src/coredump.rs b/src/coredump.rs index 51a66361..2cad9620 100644 --- a/src/coredump.rs +++ b/src/coredump.rs @@ -435,7 +435,7 @@ mod test { // we won't have the python binary for the core dump here, // so we can't (yet) figure out the interpreter address & version. // Manually specify here to test out instead - let core = CoreDump::new(&get_coredump_path("python_3_9_threads")).unwrap(); + let core = CoreDump::new(get_coredump_path("python_3_9_threads")).unwrap(); let version = Version { major: 3, minor: 9, diff --git a/src/cython.rs b/src/cython.rs index 3497f614..5b3e0c83 100644 --- a/src/cython.rs +++ b/src/cython.rs @@ -34,7 +34,7 @@ impl SourceMaps { if let Some(map) = self.maps.get(&frame.filename) { if let Some(map) = map { if let Some((file, line)) = map.lookup(line) { - frame.filename = file.clone(); + frame.filename.clone_from(file); frame.line = *line as i32; } } diff --git a/src/main.rs b/src/main.rs index 6bd7fce8..8d232939 100644 --- a/src/main.rs +++ b/src/main.rs @@ -434,7 +434,7 @@ fn sample_pyroscope(pid: remoteprocess::Pid, config: &Config) -> Result<(), Erro }); } - if let Some(process_info) = trace.process_info.as_ref().map(|x| x) { + if let Some(process_info) = trace.process_info.as_ref() { trace.frames.push(process_info.to_frame()); let mut parent = process_info.parent.as_ref(); while parent.is_some() { @@ -446,7 +446,7 @@ fn sample_pyroscope(pid: remoteprocess::Pid, config: &Config) -> Result<(), Erro } samples += 1; - output.increment(&trace)?; + output.increment(trace)?; } send_samples += 1; diff --git a/src/python_data_access.rs b/src/python_data_access.rs index 4249e40a..e6c6e018 100644 --- a/src/python_data_access.rs +++ b/src/python_data_access.rs @@ -469,7 +469,7 @@ pub mod tests { }; unsafe { let ptr = &mut ret as *mut AllocatedPyASCIIObject as *mut u8; - let dst = ptr.offset(std::mem::size_of::() as isize); + let dst = ptr.add(std::mem::size_of::()); copy_nonoverlapping(bytes.as_ptr(), dst, bytes.len()); } ret diff --git a/src/sampler.rs b/src/sampler.rs index d9831abc..6d228539 100644 --- a/src/sampler.rs +++ b/src/sampler.rs @@ -56,7 +56,7 @@ impl Sampler { spy } Err(e) => { - if initialized_tx.send(Err(e)).is_err() {} + initialized_tx.send(Err(e)).is_err(); return; } }; @@ -219,7 +219,7 @@ impl Sampler { let process = process_info .entry(pid) .or_insert_with(|| get_process_info(pid, &spies).map(|p| Arc::new(*p))); - trace.process_info = process.clone(); + trace.process_info.clone_from(process); } // Send the collected info back @@ -308,7 +308,7 @@ impl PythonSpyThread { } Err(e) => { warn!("Failed to profile python from process {}: {}", pid, e); - if initialized_tx.send(Err(e)).is_err() {} + initialized_tx.send(Err(e)).is_err(); return; } }; diff --git a/src/speedscope.rs b/src/speedscope.rs index 8d8a4023..90b4c051 100644 --- a/src/speedscope.rs +++ b/src/speedscope.rs @@ -233,7 +233,7 @@ impl Stats { self.samples .entry(key) - .or_insert_with(std::vec::Vec::new) + .or_default() .push(frame_indices); let subprocesses = self.config.subprocesses; self.thread_name_map.entry(key).or_insert_with(|| { diff --git a/src/timer.rs b/src/timer.rs index f88486d3..8a86e8d4 100644 --- a/src/timer.rs +++ b/src/timer.rs @@ -2,7 +2,6 @@ use std::time::{Duration, Instant}; #[cfg(windows)] use winapi::um::timeapi; -use rand; use rand_distr::{Distribution, Exp}; /// Timer is an iterator that sleeps an appropriate amount of time between iterations From 306cd38037b8d033b9e5ff8e055f38013938934a Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Tue, 7 May 2024 15:01:06 +0200 Subject: [PATCH 7/9] Forward signals to child process --- Cargo.lock | 64 +++++++++++++++++++++++++++++++++++++++----------- Cargo.toml | 2 ++ src/main.rs | 13 ++++++++++ src/sampler.rs | 4 ++-- 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db541c50..9d98cd69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,7 +168,7 @@ version = "0.59.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr", "clang-sys", "clap 2.34.0", @@ -191,7 +191,7 @@ version = "0.60.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr", "clang-sys", "clap 3.2.25", @@ -214,6 +214,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + [[package]] name = "bumpalo" version = "3.13.0" @@ -253,6 +259,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chrono" version = "0.4.26" @@ -287,7 +299,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim 0.8.0", "textwrap 0.11.0", "unicode-width", @@ -301,7 +313,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "atty", - "bitflags", + "bitflags 1.3.2", "clap_derive 3.2.25", "clap_lex 0.2.4", "indexmap", @@ -331,7 +343,7 @@ checksum = "72394f3339a76daf211e57d4bcb374410f3965dcc606dd0e03738c7888766980" dependencies = [ "anstream", "anstyle", - "bitflags", + "bitflags 1.3.2", "clap_lex 0.5.0", "strsim 0.10.0", ] @@ -989,9 +1001,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.144" +version = "0.2.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" [[package]] name = "libloading" @@ -1158,7 +1170,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" dependencies = [ "autocfg", - "bitflags", + "bitflags 1.3.2", "cfg-if", "libc", ] @@ -1169,12 +1181,24 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if", "libc", "static_assertions", ] +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -1243,7 +1267,7 @@ version = "0.10.54" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69b3f656a17a6cbc115b5c7a40c616947d213ba182135b014d6051b73ab6f019" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if", "foreign-types", "libc", @@ -1433,6 +1457,7 @@ dependencies = [ "log", "lru", "memmap", + "nix 0.28.0", "proc-maps 0.2.1", "py-spy-testdata", "rand", @@ -1443,6 +1468,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", + "signal-hook", "tempfile", "termios", "tokio", @@ -1531,7 +1557,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -1540,7 +1566,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -1647,7 +1673,7 @@ version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno 0.3.1", "io-lifetimes", "libc", @@ -1702,7 +1728,7 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8" dependencies = [ - "bitflags", + "bitflags 1.3.2", "core-foundation", "core-foundation-sys", "libc", @@ -1765,6 +1791,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" version = "1.4.1" diff --git a/Cargo.toml b/Cargo.toml index bf248c74..aece43f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,8 @@ remoteprocess = {version="0.4.11", features=["unwind"]} chrono = "0.4.19" reqwest = { version = "0.11", features = ["blocking"] } tokio = { version = "1", features = ["full"] } +signal-hook = "0.3.17" +nix = {version="0.28.0", features = ["signal"] } [dev-dependencies] py-spy-testdata = "0.1.0" diff --git a/src/main.rs b/src/main.rs index 8d232939..0a37733e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -37,7 +37,11 @@ use console::style; use config::{Config, FileFormat, RecordDuration}; use console_viewer::ConsoleViewer; +use libc::{SIGINT, SIGTERM}; use reqwest::StatusCode; +use signal_hook::iterator::Signals; +use nix::unistd::Pid; +use nix::sys::signal::{self, Signal}; use stack_trace::{Frame, StackTrace}; use chrono::{Local, SecondsFormat}; @@ -597,6 +601,15 @@ fn pyspy_main() -> Result<(), Error> { .spawn() .map_err(|e| format_err!("Failed to create process '{}': {}", subprocess[0], e))?; + let mut signals = Signals::new(&[SIGINT, SIGTERM])?; + let child_pid = command.id(); + std::thread::spawn(move || { + for sig in signals.forever() { + signal::kill(Pid::from_raw(child_pid.try_into().unwrap()), Signal::try_from(sig).unwrap()).unwrap(); + println!("Received signal {:?}", sig); + } + }); + #[cfg(target_os = "macos")] { // sleep just in case: https://jvns.ca/blog/2018/01/28/mac-freeze/ diff --git a/src/sampler.rs b/src/sampler.rs index 6d228539..7fb66d3d 100644 --- a/src/sampler.rs +++ b/src/sampler.rs @@ -56,7 +56,7 @@ impl Sampler { spy } Err(e) => { - initialized_tx.send(Err(e)).is_err(); + let _ = initialized_tx.send(Err(e)).is_err(); return; } }; @@ -308,7 +308,7 @@ impl PythonSpyThread { } Err(e) => { warn!("Failed to profile python from process {}: {}", pid, e); - initialized_tx.send(Err(e)).is_err(); + let _ = initialized_tx.send(Err(e)).is_err(); return; } }; From b06bd8ee47a82e357d8fa98a4102a0edf88b7797 Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Tue, 7 May 2024 15:10:39 +0200 Subject: [PATCH 8/9] Fix --- src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.rs b/src/main.rs index 9e679a65..bcf96490 100644 --- a/src/main.rs +++ b/src/main.rs @@ -464,6 +464,7 @@ fn sample_pyroscope(pid: remoteprocess::Pid, config: &Config) -> Result<(), Erro short_filename: None, line: 0, locals: None, + is_entry: true }); } From 4df5e714422690a12be44f415446813ad0dd0919 Mon Sep 17 00:00:00 2001 From: Alexander Pankratov Date: Tue, 7 May 2024 20:16:49 +0200 Subject: [PATCH 9/9] Fix: enable stdout in pyroscope mode --- src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index 1abe469c..d5f588e4 100644 --- a/src/config.rs +++ b/src/config.rs @@ -549,7 +549,7 @@ impl Config { config.native = matches.occurrences_of("native") > 0; } - config.capture_output = config.command != "record" || matches.occurrences_of("capture") > 0; + config.capture_output = (config.command != "record" && config.command != "pyroscope" ) || matches.occurrences_of("capture") > 0; if !config.capture_output { config.hide_progress = true; }