Skip to content

Commit

Permalink
maintenance
Browse files Browse the repository at this point in the history
set 2021 edition; update hound; revisit lpca perf
  • Loading branch information
carueda committed Jul 23, 2023
1 parent 65779a2 commit f308d04
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 129 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ keywords = [
"machine-learning",
]

edition = "2018"
edition = "2021"

# https://doc.rust-lang.org/cargo/reference/build-scripts.html
build = "build.rs"
Expand All @@ -31,7 +31,7 @@ byteorder = "1.3.4"
clap = { version = "3.2.22", features = ["derive"] }
colored = "2.0.0"
csv = "1.1"
hound = "3.4.0"
hound = "3.5.0"
itertools = "0.11.0"
lazy_static = "1.4.0"
libc = "0.2.62"
Expand Down
22 changes: 18 additions & 4 deletions benches/my_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,32 @@ mod lpca_c;
#[path = "../src/lpc/lpca_rs.rs"]
mod lpca_rs;

/*
input length=1440, prediction_order=36
lpca/lpca1_rs time: [49.762 µs 50.067 µs 50.391 µs]
lpca/lpca2_rs time: [49.010 µs 49.342 µs 49.693 µs]
lpca/lpca_c time: [12.267 µs 12.362 µs 12.474 µs]
Recall that lpca_c is the C version and with -ffast-math option,
while the Rust versions are built with no similar setting.
*/
fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("lpca");
let input = lpca_rs::lpca_load_input("signal_frame.inputs").unwrap();
let frame = black_box(input.x);
let prediction_order = black_box(input.p);

//println!("input length={}, prediction_order={}", frame.len(), prediction_order);
println!(
"input length={}, prediction_order={}",
frame.len(),
prediction_order
);

let mut vector = vec![0f64; prediction_order + 1];
let mut reflex = vec![0f64; prediction_order + 1];
let mut pred = vec![0f64; prediction_order + 1];

c.bench_function("lpca1_rs", |b| {
group.bench_function("lpca1_rs", |b| {
b.iter(|| {
lpca_rs::lpca1(
&frame,
Expand All @@ -31,7 +45,7 @@ fn criterion_benchmark(c: &mut Criterion) {
})
});

c.bench_function("lpca2_rs", |b| {
group.bench_function("lpca2_rs", |b| {
b.iter(|| {
lpca_rs::lpca2(
&frame,
Expand All @@ -43,7 +57,7 @@ fn criterion_benchmark(c: &mut Criterion) {
})
});

c.bench_function("lpca_c", |b| {
group.bench_function("lpca_c", |b| {
b.iter(|| {
lpca_c::lpca(
&frame,
Expand Down
22 changes: 21 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,30 @@ all: default clippy
list:
@just --list --unsorted

# Run check
check:
cargo check

# Run benchmarks (then open target/criterion/report/index.html)
bench:
cargo bench

# Run tests
test:
cargo test

# Run tests with --nocapture
test-nocapture *args='':
cargo test -- --nocapture {{args}}

# Run espclient (e.g.: just run --help)
run *args='':
cargo run -- {{ args }}

# Clean
clean:
cargo clean

# Format source code
format:
cargo fmt
Expand All @@ -28,8 +44,12 @@ clippy:
release:
cargo build --release

# Build release with RUSTFLAGS="-C target-cpu=native"
release-native:
RUSTFLAGS="-C target-cpu=native" cargo build --release

# Install locally
install: release
install: release-native
cargo install --path .

# (cargo install --locked cargo-outdated)
Expand Down
11 changes: 9 additions & 2 deletions src/ecoz2_lib/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ extern crate libc;

pub mod lpca_c;

use std::error::Error;
use std::ffi::CStr;
use std::ffi::CString;
use std::os::raw::{c_float, c_long, c_ulong};
Expand Down Expand Up @@ -216,8 +217,13 @@ pub fn lpc_signals(
}
}

pub fn prd_show_file(prd_filename: PathBuf, show_reflections: bool, from: usize, to: usize) {
let prd_filename_c_string = CString::new(prd_filename.to_str().unwrap()).unwrap();
pub fn prd_show_file(
prd_filename: PathBuf,
show_reflections: bool,
from: usize,
to: usize,
) -> Result<(), Box<dyn Error>> {
let prd_filename_c_string = CString::new(prd_filename.to_str().unwrap())?;

unsafe {
let filename = prd_filename_c_string.as_ptr() as *const i8;
Expand All @@ -229,6 +235,7 @@ pub fn prd_show_file(prd_filename: PathBuf, show_reflections: bool, from: usize,
to as c_int,
)
}
Ok(())
}

#[no_mangle]
Expand Down
120 changes: 35 additions & 85 deletions src/lpc/lpca_rs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub fn lpca(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64]) -
pub fn lpca1(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64]) -> (i32, f64) {
let n = x.len();

// this is the expensive part:
for i in 0..=p {
let mut sum = 0.0f64;
for k in 0..n - i {
Expand All @@ -49,11 +50,15 @@ pub fn lpca1(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64])
for i in 1..=k {
sum -= a[k - i] * r[i];
}

let akk = sum / pe;
rc[k] = akk;

rc[k] = akk;
a[k] = akk;
for i in 1..=k >> 1 {

let k2 = k >> 1;

for i in 1..=k2 {
let ai = a[i];
let aj = a[k - i];
a[i] = ai + akk * aj;
Expand All @@ -69,14 +74,13 @@ pub fn lpca1(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64])
(0, pe)
}

// like lpca1 but with some use of iterators,
// which does seem to improve performance (per cargo bench)
// but making the code not always as readable.
/// Like lpca1 but with use of iterators; similar performance.
#[allow(dead_code)]
#[inline]
pub fn lpca2(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64]) -> (i32, f64) {
let n = x.len();

// this is the expensive part:
for (i, r_i) in r.iter_mut().enumerate() {
*r_i = x[0..n - i]
.iter()
Expand All @@ -94,10 +98,6 @@ pub fn lpca2(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64])
pe = r0;
a[0] = 1.0f64;
for k in 1..=p {
// let mut sum = 0.0f64;
// for i in 1..=k {
// sum -= a[k - i] * r[i];
// }
let sum = -a[0..k]
.iter()
.rev()
Expand All @@ -109,11 +109,22 @@ pub fn lpca2(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64])

rc[k] = akk;
a[k] = akk;
for i in 1..=k >> 1 {
let ai = a[i];
let aj = a[k - i];
a[i] = ai + akk * aj;
a[k - i] = aj + akk * ai;

let k2 = k >> 1;

// note: when k is even, we handle the "middle" element after this:
let (a_left, a_right) = a[1..k].split_at_mut(k2);
a_left
.iter_mut()
.zip(a_right.iter_mut().rev())
.for_each(|(ai, aj)| {
let tmp = *ai;
*ai += akk * *aj;
*aj += akk * tmp;
});
if k & 1 == 0 {
// handle pending "overlapping" element in the middle:
a[k2] += akk * a[k2];
}

pe *= 1.0f64 - akk * akk;
Expand Down Expand Up @@ -141,95 +152,34 @@ mod tests {
prediction_order
);

let mut vector = vec![0f64; prediction_order + 1];
let mut reflex = vec![0f64; prediction_order + 1];
let mut vector1 = vec![0f64; prediction_order + 1];
let mut reflex1 = vec![0f64; prediction_order + 1];
let mut pred1 = vec![0f64; prediction_order + 1];

lpca1(
&input.x[..],
prediction_order,
&mut vector,
&mut reflex,
&mut vector1,
&mut reflex1,
&mut pred1,
);

let mut vector2 = vec![0f64; prediction_order + 1];
let mut reflex2 = vec![0f64; prediction_order + 1];
let mut pred2 = vec![0f64; prediction_order + 1];
lpca2(
&input.x[..],
prediction_order,
&mut vector,
&mut reflex,
&mut vector2,
&mut reflex2,
&mut pred2,
);

assert_eq!(vector1, vector2);
assert_eq!(reflex1, reflex2);
assert_eq!(pred1, pred2);
}
}

/// one other equivalent version with some unsafe mechanisms, just for
/// possible reference, but still no actual gain in performance.
///
//#[inline]
//fn lpca_unsafe(x: &[f64], p: usize, r: &mut [f64], rc: &mut [f64], a: &mut [f64]) -> (i32, f64) {
// let n = x.len();
//
// let mut pe: f64 = 0.;
//
// unsafe {
// for i in 0..=p {
// let mut sum = 0.0f64;
// for k in 0..n - i {
// //sum += x[k] * x[k + i];
// let xk = *x.get_unchecked(k);
// let xki = *x.get_unchecked(k + i);
// sum += xk * xki;
// }
// *r.get_unchecked_mut(i) = sum;
// //r[i] = sum;
// }
// let r0 = *r.get_unchecked(0);
// //let r0 = r[0];
// if 0.0f64 == r0 {
// return (1, pe);
// }
//
// pe = r0;
// *a.get_unchecked_mut(0) = 1.0f64;
// //a[0] = 1.0f64;
// for k in 1..=p {
// let mut sum = 0.0f64;
// for i in 1..=k {
// //sum -= a[k - i] * r[i];
// let aki = *a.get_unchecked(k - i);
// let ri = *r.get_unchecked(i);
// sum -= aki * ri;
// }
// let akk = sum / pe;
// *rc.get_unchecked_mut(k) = akk;
// //rc[k] = akk;
//
// a[k] = akk;
// for i in 1..=k >> 1 {
// //let ai = a[i];
// //let aj = a[k - i];
// //a[i] = ai + akk * aj;
// //a[k - i] = aj + akk * ai;
// let ai = *a.get_unchecked(i);
// let aj = *a.get_unchecked(k - i);
// *a.get_unchecked_mut(i) = ai + akk * aj;
// *a.get_unchecked_mut(k - i) = aj + akk * ai;
// }
//
// pe *= 1.0f64 - akk * akk;
// if pe <= 0.0f64 {
// return (2, pe);
// }
// }
// }
//
// (0, pe)
//}

#[derive(serde::Serialize, serde::Deserialize, Debug)]
pub struct LpcaInput {
pub x: Vec<f64>,
Expand Down
11 changes: 5 additions & 6 deletions src/prd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,10 @@ pub fn prd_show(opts: PrdShowOpts) -> Result<(), Box<dyn Error>> {
from,
to,
pickle,
);
)
} else {
prd_show_file(file, show_reflections, from, to);
prd_show_file(file, show_reflections, from, to)
}

Ok(())
}

// NOTE: for Rust implementation (preliminary)
Expand All @@ -112,9 +110,9 @@ fn prd_show_rs(
from: usize,
to: usize,
pickle: Option<PathBuf>,
) {
) -> Result<(), Box<dyn Error>> {
let filename = prd_filename.to_str().unwrap();
let mut prd = load(filename).unwrap();
let mut prd = load(filename)?;
println!("# {}", filename);
prd.show(
show_predictors,
Expand All @@ -124,6 +122,7 @@ fn prd_show_rs(
to,
pickle,
);
Ok(())
}

#[derive(serde::Serialize, serde::Deserialize, Debug)]
Expand Down
Loading

0 comments on commit f308d04

Please sign in to comment.