Skip to content

Commit

Permalink
add benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
magnetophon committed Dec 31, 2024
1 parent 7cac2cc commit 1e1d188
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 70 deletions.
23 changes: 23 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,7 @@ strip = "symbols"
inherits = "release"
debug = true
strip = "none"

[dev-dependencies]
core_affinity = "0.8.0"
chrono = "0.4"
251 changes: 181 additions & 70 deletions src/svf_simper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -698,86 +698,197 @@ where
#[cfg(test)]
mod tests {
use super::*;
use std::hint::black_box;
use std::time::Instant;

fn get_cpu_info() -> String {
#[cfg(target_os = "linux")]
{
if let Ok(contents) = std::fs::read_to_string("/proc/cpuinfo") {
if let Some(model_line) = contents.lines().find(|l| l.starts_with("model name")) {
return model_line
.split(":")
.nth(1)
.unwrap_or("Unknown")
.trim()
.to_string();
}
}
}
"Unknown CPU".to_string()
}

fn print_cpu_features() {
#[cfg(target_arch = "x86_64")]
{
println!("\nCPU Features:");
println!(" AVX512: {}", is_x86_feature_detected!("avx512f"));
println!(" AVX2: {}", is_x86_feature_detected!("avx2"));
println!(" SSE2: {}", is_x86_feature_detected!("sse2"));
}
}

fn pin_to_performance_core() {
#[cfg(target_os = "linux")]
{
use core_affinity::CoreId;
if let Some(core_ids) = core_affinity::get_core_ids() {
println!("\nAvailable cores: {}", core_ids.len());
if let Some(core_id) = core_ids.first() {
core_affinity::set_for_current(*core_id);
println!("Pinned to core {}", core_id.id);
}
}
}
}

#[test]
fn test_svf_simper() {
fn benchmark_filter() {
println!("\nBenchmarking on: {}", get_cpu_info());
println!("Current Date and Time (UTC): {}", chrono::Utc::now());
if let Ok(user) = std::env::var("USER") {
println!("Current User's Login: {}", user);
}

print_cpu_features();
pin_to_performance_core();

let sample_rate = 48000.0;
let test_freqs = [110.0, 220.0, 440.0, 880.0, 1760.0, 3520.0];
let resonances = [0.0, 0.5, 0.7, 0.9];
let cutoff = 1000.0;
let resonance = 0.5;

for &resonance in &resonances {
let mut filter = SVFSimper::<4>::new(cutoff, resonance, sample_rate);

// Verify k coefficient matches expected value
let expected_k = 2.0 * (1.0 - resonance);
assert!(
(filter.k[0] - expected_k).abs() < 1e-6,
"k coefficient incorrect for resonance {}: expected {}, got {}",
resonance,
expected_k,
filter.k[0]
);

// Test frequency response
for &freq in &test_freqs {
let num_samples = (sample_rate / freq * 10.0) as usize;
let mut power_sum_in = 0.0;
let mut power_sum_out = 0.0;

// Warm up filter
for i in 0..(2.0 * sample_rate / freq) as usize {
let t = i as f32 / sample_rate;
let input = (2.0 * std::f32::consts::PI * freq * t).sin();
filter.process(Simd::splat(input));
}
// Test each lane width with both generic and SIMD implementations
println!("\n=== LANES=4 ===");
benchmark_comparison::<4>(sample_rate, cutoff, resonance);

// Measure response
for i in 0..num_samples {
let t = i as f32 / sample_rate;
let input = (2.0 * std::f32::consts::PI * freq * t).sin();
let (_, lp) = filter.process(Simd::splat(input));
let output = lp.to_array()[0];
println!("\n=== LANES=8 ===");
benchmark_comparison::<8>(sample_rate, cutoff, resonance);

power_sum_in += input * input;
power_sum_out += output * output;
}
println!("\n=== LANES=16 ===");
benchmark_comparison::<16>(sample_rate, cutoff, resonance);
}

let rms_in = (power_sum_in / num_samples as f32).sqrt();
let rms_out = (power_sum_out / num_samples as f32).sqrt();
let ratio = rms_out / rms_in;
let db = 20.0 * ratio.log10();

// Verify filter behavior based on resonance and frequency
if freq < cutoff / 2.0 {
// Check passband (should have minimal attenuation)
assert!(
db > -3.0,
"Too much attenuation in passband: {} dB at {} Hz (resonance {})",
db,
freq,
resonance
);
} else if freq > cutoff * 2.0 {
// Check stopband (should have significant attenuation)
assert!(
db < -12.0,
"Insufficient attenuation in stopband: {} dB at {} Hz (resonance {})",
db,
freq,
resonance
);
}
fn benchmark_comparison<const LANES: usize>(sample_rate: f32, cutoff: f32, resonance: f32)
where
LaneCount<LANES>: SupportedLaneCount,
{
// Create two filters - one forced to generic, one using selected SIMD
let mut filter_generic = SVFSimper::<LANES>::new(cutoff, resonance, sample_rate);
let mut filter_simd = SVFSimper::<LANES>::new(cutoff, resonance, sample_rate);

// For high resonance, verify peak at cutoff
if resonance > 0.7 && (freq as f32 - cutoff).abs() < cutoff * 0.1 {
assert!(
db > 0.0,
"Expected resonant peak near cutoff for resonance {}",
resonance
);
}
// Force generic implementation for comparison
filter_generic.dispatch = FilterDispatch::Generic;

let input = Simd::from_array([0.1; LANES]);

println!("Generic implementation:");
let generic_stats = run_benchmark(&mut filter_generic, input);

println!("\nSIMD implementation ({:?}):", filter_simd.dispatch);
let simd_stats = run_benchmark(&mut filter_simd, input);

println!("\nComparison:");
println!(
" Generic: {:.2}ns per sample",
generic_stats.median / LANES as f64
);
println!(
" SIMD: {:.2}ns per sample",
simd_stats.median / LANES as f64
);
println!(
" Speedup: {:.2}x",
generic_stats.median / simd_stats.median
);
}

struct BenchmarkStats {
median: f64,
mean: f64,
min: f64,
max: f64,
std_dev: f64,
}

fn run_benchmark<const LANES: usize>(
filter: &mut SVFSimper<LANES>,
input: Simd<f32, LANES>,
) -> BenchmarkStats
where
LaneCount<LANES>: SupportedLaneCount,
{
let num_iterations = 1_000_000;
let num_runs = 10;
let mut durations = Vec::with_capacity(num_runs);

// Warm up
for _ in 0..num_iterations {
black_box(filter.process(black_box(input)));
}

// Actual benchmark runs
for run in 1..=num_runs {
let start = Instant::now();
for _ in 0..num_iterations {
black_box(filter.process(black_box(input)));
}
let duration = start.elapsed().as_nanos() as f64 / num_iterations as f64;
durations.push(duration);
println!("Run {}: {:.2}ns per {} samples", run, duration, LANES);
}

durations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median = if num_runs % 2 == 0 {
(durations[num_runs / 2 - 1] + durations[num_runs / 2]) / 2.0
} else {
durations[num_runs / 2]
};

let mean = durations.iter().sum::<f64>() / num_runs as f64;
let variance = durations
.iter()
.map(|x| (x - mean) * (x - mean))
.sum::<f64>()
/ num_runs as f64;
let std_dev = variance.sqrt();

println!("\nSummary:");
println!(
" Median: {:.2}ns per {} samples ({:.2}ns per sample)",
median,
LANES,
median / LANES as f64
);
println!(
" Mean: {:.2}ns per {} samples ({:.2}ns per sample)",
mean,
LANES,
mean / LANES as f64
);
println!(
" Min: {:.2}ns per {} samples ({:.2}ns per sample)",
durations[0],
LANES,
durations[0] / LANES as f64
);
println!(
" Max: {:.2}ns per {} samples ({:.2}ns per sample)",
durations[num_runs - 1],
LANES,
durations[num_runs - 1] / LANES as f64
);
println!(" Std Dev:{:.2}ns", std_dev);
println!(
" Throughput: {:.2}M samples/second",
1000.0 / (median / LANES as f64) / 1000.0
);

BenchmarkStats {
median,
mean,
min: durations[0],
max: durations[num_runs - 1],
std_dev,
}
}
}
Expand Down

0 comments on commit 1e1d188

Please sign in to comment.