Skip to content

Commit

Permalink
Crashtracker receiver is spawned on crash (#692)
Browse files Browse the repository at this point in the history
Fixes a few things going on with crashtracker
* Long-lived child processes are problematic for wait
* Can create zombies
* Doesn't correctly handle certain signals
* General cleanup

---------

Co-authored-by: Kevin Gosse <[email protected]>
Co-authored-by: Daniel Schwartz-Narbonne <[email protected]>
Co-authored-by: bwoebi <[email protected]>
  • Loading branch information
4 people authored Oct 31, 2024
1 parent d92f667 commit 4f324e7
Show file tree
Hide file tree
Showing 32 changed files with 1,722 additions and 497 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions bin_tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ ddcommon = { path = "../ddcommon" }
tempfile = "3.3"
serde_json = { version = "1.0" }
strum = { version = "0.26.2", features = ["derive"] }
libc = "0.2"
nix = { version = "0.27.1", features = ["socket"] }

[lib]
bench = false
Expand All @@ -33,7 +35,3 @@ bench = false
[[bin]]
name = "crashtracker_receiver"
bench = false

[[bin]]
name = "crashtracker_unix_socket_receiver"
bench = false
79 changes: 37 additions & 42 deletions bin_tests/src/bin/crashtracker_bin_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,45 +12,51 @@ fn main() -> anyhow::Result<()> {
#[cfg(unix)]
mod unix {
use anyhow::Context;
use bin_tests::ReceiverType;
use std::{env, fs::File, str::FromStr};
use bin_tests::modes::behavior::get_behavior;
use std::env;
use std::path::Path;

use datadog_crashtracker::{
self as crashtracker, CrashtrackerConfiguration, CrashtrackerMetadata,
CrashtrackerReceiverConfig,
};
use ddcommon::{tag, Endpoint};

const TEST_COLLECTOR_TIMEOUT_MS: u32 = 10_000;

#[inline(never)]
unsafe fn deref_ptr(p: *mut u8) {
*std::hint::black_box(p) = std::hint::black_box(1);
}

pub fn main() -> anyhow::Result<()> {
let mut args = env::args().skip(1);
let mode = args.next().context("Unexpected number of arguments")?;
let output_url = args.next().context("Unexpected number of arguments")?;
let receiver_binary = args.next().context("Unexpected number of arguments")?;
let unix_socket_receiver_binary = args.next().context("Unexpected number of arguments")?;
let stderr_filename = args.next().context("Unexpected number of arguments")?;
let stdout_filename = args.next().context("Unexpected number of arguments")?;
let socket_path = args.next().context("Unexpected number of arguments")?;
let output_dir = args.next().context("Unexpected number of arguments")?;
let mode_str = args.next().context("Unexpected number of arguments")?;
anyhow::ensure!(args.next().is_none(), "unexpected extra arguments");

let wait_for_receiver = true;
let stderr_filename = format!("{output_dir}/out.stderr");
let stdout_filename = format!("{output_dir}/out.stdout");
let output_dir: &Path = output_dir.as_ref();

let endpoint = if output_url.is_empty() {
None
} else {
Some(Endpoint::from_slice(&output_url))
};

let config = CrashtrackerConfiguration {
// The configuration can be modified by a Behavior (testing plan), so it is mut here.
// Unlike a normal harness, in this harness tests are run in individual processes, so race
// issues are avoided.
let mut config = CrashtrackerConfiguration {
additional_files: vec![],
create_alt_stack: true,
use_alt_stack: true,
resolve_frames: crashtracker::StacktraceCollection::WithoutSymbols,
endpoint,
wait_for_receiver,
timeout_ms: TEST_COLLECTOR_TIMEOUT_MS,
};

let metadata = CrashtrackerMetadata {
Expand All @@ -64,44 +70,33 @@ mod unix {
tag!("language", "native"),
],
};
match ReceiverType::from_str(&mode)? {
ReceiverType::ChildProcessStdin => {
crashtracker::init_with_receiver(
config,
CrashtrackerReceiverConfig::new(
vec![],
env::vars().collect(),
receiver_binary,
Some(stderr_filename),
Some(stdout_filename),
)?,
metadata,
)?;
}
ReceiverType::UnixSocket => {
// Fork a unix socket receiver
// For now, this exits when a single message is received.
// When the listener is updated, we'll need to keep the handle and kill the receiver
// to avoid leaking a process.
std::process::Command::new(unix_socket_receiver_binary)
.stderr(File::create(stderr_filename)?)
.stdout(File::create(stdout_filename)?)
.arg(&socket_path)
.spawn()
.context("failed to spawn unix receiver")?;

// Wait long enough for the receiver to establish the socket
std::thread::sleep(std::time::Duration::from_secs(1));

crashtracker::init_with_unix_socket(config, &socket_path, metadata)?;
}
}

// Set the behavior of the test, run setup, and do the pre-init test
let behavior = get_behavior(&mode_str);
behavior.setup(output_dir, &mut config)?;
behavior.pre(output_dir)?;

crashtracker::init(
config,
CrashtrackerReceiverConfig::new(
vec![],
env::vars().collect(),
receiver_binary,
Some(stderr_filename),
Some(stdout_filename),
)?,
metadata,
)?;

// Conduct the post-init test
behavior.post(output_dir)?;

crashtracker::begin_op(crashtracker::OpTypes::ProfilerCollectingSample)?;
unsafe {
deref_ptr(std::ptr::null_mut::<u8>());
}
crashtracker::end_op(crashtracker::OpTypes::ProfilerCollectingSample)?;

Ok(())
}
}
15 changes: 0 additions & 15 deletions bin_tests/src/bin/crashtracker_unix_socket_receiver.rs

This file was deleted.

11 changes: 3 additions & 8 deletions bin_tests/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

pub mod modes;

use std::{collections::HashMap, env, ops::DerefMut, path::PathBuf, process, sync::Mutex};

use anyhow::Ok;
use once_cell::sync::OnceCell;
use strum::{Display, EnumString};

/// This crate implements an abstraction over compilation with cargo with the purpose
/// of testing full binaries or dynamic libraries, instead if just rust static libraries.
/// of testing full binaries or dynamic libraries, instead of just rust static libraries.
///
/// The main entrypoint is `fn build_artifacts` which takes a list of artifacts to build,
/// either executable crates, cdylib, or extra binaries, invokes cargo and return the path
Expand All @@ -28,12 +29,6 @@ pub enum ArtifactType {
Bin,
}

#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, EnumString, Display)]
pub enum ReceiverType {
ChildProcessStdin,
UnixSocket,
}

#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BuildProfile {
Debug,
Expand Down
91 changes: 91 additions & 0 deletions bin_tests/src/modes/behavior.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

#![cfg(unix)]
use anyhow::{Context, Result};
use datadog_crashtracker::CrashtrackerConfiguration;
use std::fs::OpenOptions;
use std::io::Write;
use std::path::Path;
use std::sync::atomic::{AtomicPtr, Ordering};

use crate::modes::unix::*;

/// Defines the additional behavior for a given crashtracking test
pub trait Behavior {
fn setup(&self, output_dir: &Path, config: &mut CrashtrackerConfiguration) -> Result<()>;
fn pre(&self, output_dir: &Path) -> Result<()>;
fn post(&self, output_dir: &Path) -> Result<()>;
}

pub fn fileat_content_equals(dir: &Path, filename: &str, contents: &str) -> anyhow::Result<bool> {
let filepath = dir.join(filename);
file_content_equals(&filepath, contents)
}

pub fn file_content_equals(filepath: &Path, contents: &str) -> anyhow::Result<bool> {
let file_contents = std::fs::read_to_string(filepath)
.with_context(|| format!("Failed to read file: {}", filepath.display()))?;
Ok(file_contents.trim() == contents)
}

pub fn file_append_msg(filepath: &Path, contents: &str) -> Result<()> {
let mut file = OpenOptions::new()
.create(true)
.append(true)
.open(filepath)
.with_context(|| format!("Failed to open file: {}", filepath.display()))?;

file.write_all(contents.as_bytes())
.with_context(|| format!("Failed to write to file: {}", filepath.display()))?;

Ok(())
}

pub fn atom_to_clone<T: Clone>(atom: &AtomicPtr<T>) -> Result<T> {
let ptr = atom.load(Ordering::SeqCst);
anyhow::ensure!(!ptr.is_null(), "Pointer was null");

// If not null, clone the referenced value
unsafe {
ptr.as_ref()
.cloned()
.ok_or_else(|| anyhow::anyhow!("Failed to clone"))
}
}

pub fn set_atomic<T>(atom: &AtomicPtr<T>, value: T) {
let box_ptr = Box::into_raw(Box::new(value));
let old = atom.swap(box_ptr, Ordering::SeqCst);
if !old.is_null() {
unsafe {
// Drop the previous value safely
let _ = Box::from_raw(old);
}
}
}

pub fn removeat_permissive(dir: &Path, filename: &str) {
let filepath = dir.join(filename);
remove_permissive(&filepath);
}

pub fn remove_permissive(filepath: &Path) {
// Removes the file if it exists. If it doesn't exist, it's not an error or anything.
let _ = std::fs::remove_file(filepath);
}

pub fn get_behavior(mode_str: &str) -> Box<dyn Behavior> {
match mode_str {
"donothing" => Box::new(test_000_donothing::Test),
"sigpipe" => Box::new(test_001_sigpipe::Test),
"sigchld" => Box::new(test_002_sigchld::Test),
"sigchld_exec" => Box::new(test_003_sigchld_with_exec::Test),
"donothing_sigstack" => Box::new(test_004_donothing_sigstack::Test),
"sigpipe_sigstack" => Box::new(test_005_sigpipe_sigstack::Test),
"sigchld_sigstack" => Box::new(test_006_sigchld_sigstack::Test),
"chained" => Box::new(test_007_chaining::Test),
"fork" => Box::new(test_008_fork::Test),
_ => panic!("Unknown mode: {}", mode_str),
}
}
5 changes: 5 additions & 0 deletions bin_tests/src/modes/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0
pub mod behavior;
#[cfg(unix)]
pub mod unix;
11 changes: 11 additions & 0 deletions bin_tests/src/modes/unix/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0
pub mod test_000_donothing;
pub mod test_001_sigpipe;
pub mod test_002_sigchld;
pub mod test_003_sigchld_with_exec;
pub mod test_004_donothing_sigstack;
pub mod test_005_sigpipe_sigstack;
pub mod test_006_sigchld_sigstack;
pub mod test_007_chaining;
pub mod test_008_fork;
29 changes: 29 additions & 0 deletions bin_tests/src/modes/unix/test_000_donothing.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0
//
// This is a baseline test for the crashtracker. It doesn't really do anything except run the
// crashtracker in a fairly normal configuration, generates a crash, and then ensures that the
// crashtracker has recorded the situation as expected.
use crate::modes::behavior::Behavior;
use datadog_crashtracker::CrashtrackerConfiguration;
use std::path::Path;

pub struct Test;

impl Behavior for Test {
fn setup(
&self,
_output_dir: &Path,
_config: &mut CrashtrackerConfiguration,
) -> anyhow::Result<()> {
Ok(())
}

fn pre(&self, _output_dir: &Path) -> anyhow::Result<()> {
Ok(())
}

fn post(&self, _output_dir: &Path) -> anyhow::Result<()> {
Ok(())
}
}
Loading

0 comments on commit 4f324e7

Please sign in to comment.