diff --git a/Cargo.toml b/Cargo.toml index 37e7e5a7..fcc45e18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,3 +44,9 @@ benchmarks = [] path = "benches/decoder.rs" name = "decoder" harness = false + +[[bench]] +path = "benches/unfilter.rs" +name = "unfilter" +harness = false +required-features = ["benchmarks"] diff --git a/benches/unfilter.rs b/benches/unfilter.rs new file mode 100644 index 00000000..2f6e1f2f --- /dev/null +++ b/benches/unfilter.rs @@ -0,0 +1,56 @@ +//! Usage example: +//! +//! ``` +//! $ alias bench="rustup run nightly cargo bench" +//! $ bench --bench=unfilter --features=benchmarks -- --save-baseline my_baseline +//! ... tweak something, say the Sub filter ... +//! $ bench --bench=unfilter --features=benchmarks -- filter=Sub --baseline my_baseline +//! ``` + +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use png::benchable_apis::unfilter; +use png::FilterType; +use rand::Rng; + +fn unfilter_all(c: &mut Criterion) { + let bpps = [1, 2, 3, 4, 6, 8]; + let filters = [ + FilterType::Sub, + FilterType::Up, + FilterType::Avg, + FilterType::Paeth, + ]; + for &filter in filters.iter() { + for &bpp in bpps.iter() { + bench_unfilter(c, filter, bpp); + } + } +} + +criterion_group!(benches, unfilter_all); +criterion_main!(benches); + +fn bench_unfilter(c: &mut Criterion, filter: FilterType, bpp: u8) { + let mut group = c.benchmark_group("unfilter"); + + fn get_random_bytes(rng: &mut R, n: usize) -> Vec { + use rand::Fill; + let mut result = vec![0u8; n]; + result.as_mut_slice().try_fill(rng).unwrap(); + result + } + let mut rng = rand::thread_rng(); + let row_size = 4096 * (bpp as usize); + let two_rows = get_random_bytes(&mut rng, row_size * 2); + + group.throughput(Throughput::Bytes(row_size as u64)); + group.bench_with_input( + format!("filter={filter:?}/bpp={bpp}"), + &two_rows, + |b, two_rows| { + let (prev_row, curr_row) = two_rows.split_at(row_size); + let mut curr_row = curr_row.to_vec(); + b.iter(|| unfilter(filter, bpp, prev_row, curr_row.as_mut_slice())); + }, + ); +} diff --git a/src/benchable_apis.rs b/src/benchable_apis.rs new file mode 100644 index 00000000..0be8134f --- /dev/null +++ b/src/benchable_apis.rs @@ -0,0 +1,12 @@ +//! Development-time-only helper module for exporting private APIs so that they can be benchmarked. +//! This module is gated behind the "benchmarks" feature. + +use crate::common::BytesPerPixel; +use crate::filter::FilterType; + +/// Re-exporting `unfilter` to make it easier to benchmark, despite some items being only +/// `pub(crate)`: `fn unfilter`, `enum BytesPerPixel`. +pub fn unfilter(filter: FilterType, tbpp: u8, previous: &[u8], current: &mut [u8]) { + let tbpp = BytesPerPixel::from_usize(tbpp as usize); + crate::filter::unfilter(filter, tbpp, previous, current) +} diff --git a/src/common.rs b/src/common.rs index 6e5dbffe..400aca11 100644 --- a/src/common.rs +++ b/src/common.rs @@ -594,15 +594,7 @@ impl Info<'_> { /// has the consequence that the number of possible values is rather small. To make this fact /// more obvious in the type system and the optimizer we use an explicit enum here. pub(crate) fn bpp_in_prediction(&self) -> BytesPerPixel { - match self.bytes_per_pixel() { - 1 => BytesPerPixel::One, - 2 => BytesPerPixel::Two, - 3 => BytesPerPixel::Three, - 4 => BytesPerPixel::Four, - 6 => BytesPerPixel::Six, // Only rgb×16bit - 8 => BytesPerPixel::Eight, // Only rgba×16bit - _ => unreachable!("Not a possible byte rounded pixel width"), - } + BytesPerPixel::from_usize(self.bytes_per_pixel()) } /// Returns the number of bytes needed for one deinterlaced image. @@ -695,6 +687,18 @@ impl Info<'_> { } impl BytesPerPixel { + pub(crate) fn from_usize(bpp: usize) -> Self { + match bpp { + 1 => BytesPerPixel::One, + 2 => BytesPerPixel::Two, + 3 => BytesPerPixel::Three, + 4 => BytesPerPixel::Four, + 6 => BytesPerPixel::Six, // Only rgb×16bit + 8 => BytesPerPixel::Eight, // Only rgba×16bit + _ => unreachable!("Not a possible byte rounded pixel width"), + } + } + pub(crate) fn into_usize(self) -> usize { self as usize } diff --git a/src/lib.rs b/src/lib.rs index b3bb15b1..1bcfdb99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,3 +79,6 @@ pub use crate::decoder::{ }; pub use crate::encoder::{Encoder, EncodingError, StreamWriter, Writer}; pub use crate::filter::{AdaptiveFilterType, FilterType}; + +#[cfg(feature = "benchmarks")] +pub mod benchable_apis;