Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let user specify iterations without improvement #21

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/deflate.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use alloc::vec::Vec;
use core::{cmp, iter};
use core::{cmp, iter, num::NonZeroU64};

use log::{debug, log_enabled};

Expand Down Expand Up @@ -1186,7 +1186,14 @@ fn blocksplit_attempt<W: Write>(
for &item in &splitpoints_uncompressed {
let mut s = ZopfliBlockState::new(options, last, item);

let store = lz77_optimal(&mut s, in_data, last, item, options.iteration_count.get());
let store = lz77_optimal(
&mut s,
in_data,
last,
item,
options.iteration_count.map(NonZeroU64::get),
options.iterations_without_improvement.map(NonZeroU64::get),
);
totalcost += calculate_block_size_auto_type(&store, 0, store.size());

// ZopfliAppendLZ77Store(&store, &lz77);
Expand All @@ -1202,7 +1209,14 @@ fn blocksplit_attempt<W: Write>(

let mut s = ZopfliBlockState::new(options, last, inend);

let store = lz77_optimal(&mut s, in_data, last, inend, options.iteration_count.get());
let store = lz77_optimal(
&mut s,
in_data,
last,
inend,
options.iteration_count.map(NonZeroU64::get),
options.iterations_without_improvement.map(NonZeroU64::get),
);
totalcost += calculate_block_size_auto_type(&store, 0, store.size());

// ZopfliAppendLZ77Store(&store, &lz77);
Expand Down
12 changes: 8 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ mod util;
#[cfg(feature = "zlib")]
mod zlib;

use core::num::NonZeroU8;
use core::num::NonZeroU64;
#[cfg(all(not(doc), feature = "std"))]
use std::io::{Error, Write};

Expand All @@ -80,10 +80,13 @@ pub struct Options {
#[cfg_attr(
all(test, feature = "std"),
proptest(
strategy = "(1..=10u8).prop_map(|iteration_count| NonZeroU8::new(iteration_count).unwrap())"
strategy = "(1..=10u64).prop_map(|iteration_count| NonZeroU64::new(iteration_count))"
)
)]
pub iteration_count: NonZeroU8,
pub iteration_count: Option<NonZeroU64>,
/// Stop after rerunning forward and backward pass this many times without finding
/// a smaller representation of the block.
pub iterations_without_improvement: Option<NonZeroU64>,
/// Maximum amount of blocks to split into (0 for unlimited, but this can give
/// extreme results that hurt compression on some files).
///
Expand All @@ -94,7 +97,8 @@ pub struct Options {
impl Default for Options {
fn default() -> Options {
Options {
iteration_count: NonZeroU8::new(15).unwrap(),
iteration_count: Some(NonZeroU64::new(15).unwrap()),
iterations_without_improvement: None,
maximum_block_splits: 15,
}
}
Expand Down
32 changes: 24 additions & 8 deletions src/squeeze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,8 @@ pub fn lz77_optimal<C: Cache>(
in_data: &[u8],
instart: usize,
inend: usize,
numiterations: u8,
max_iterations: Option<u64>,
max_iterations_without_improvement: Option<u64>,
) -> Lz77Store {
/* Dist to get to here with smallest cost. */
let mut currentstore = Lz77Store::new();
Expand All @@ -470,13 +471,15 @@ pub fn lz77_optimal<C: Cache>(
let mut lastcost = 0.0;
/* Try randomizing the costs a bit once the size stabilizes. */
let mut ran_state = RanState::new();
let mut lastrandomstep = -1;
let mut lastrandomstep = u64::MAX;

/* Do regular deflate, then loop multiple shortest path runs, each time using
the statistics of the previous run. */
/* Repeat statistics with each time the cost model from the previous stat
run. */
for i in 0..numiterations as i32 {
let mut current_iteration: u64 = 0;
let mut iterations_without_improvement: u64 = 0;
loop {
currentstore.reset();
lz77_optimal_run(
s,
Expand All @@ -491,30 +494,43 @@ pub fn lz77_optimal<C: Cache>(
let cost = calculate_block_size(&currentstore, 0, currentstore.size(), BlockType::Dynamic);

if cost < bestcost {
iterations_without_improvement = 0;
/* Copy to the output store. */
outputstore = currentstore.clone();
beststats = stats;
bestcost = cost;

debug!("Iteration {}: {} bit", i, cost);
debug!("Iteration {}: {} bit", current_iteration, cost);
} else {
trace!("Iteration {}: {} bit", i, cost);
iterations_without_improvement += 1;
trace!("Iteration {}: {} bit", current_iteration, cost);
if let Some(max_iterations_without_improvement) = max_iterations_without_improvement {
if iterations_without_improvement >= max_iterations_without_improvement {
break;
}
}
}
current_iteration += 1;
if let Some(max_iterations) = max_iterations {
if current_iteration >= max_iterations {
break;
}
}
let laststats = stats;
stats.clear_freqs();
stats.get_statistics(&currentstore);
if lastrandomstep != -1 {
if lastrandomstep != u64::MAX {
/* This makes it converge slower but better. Do it only once the
randomness kicks in so that if the user does few iterations, it gives a
better result sooner. */
stats = add_weighed_stat_freqs(&stats, 1.0, &laststats, 0.5);
stats.calculate_entropy();
}
if i > 5 && (cost - lastcost).abs() < f64::EPSILON {
if current_iteration > 5 && (cost - lastcost).abs() < f64::EPSILON {
stats = beststats;
stats.randomize_stat_freqs(&mut ran_state);
stats.calculate_entropy();
lastrandomstep = i;
lastrandomstep = current_iteration;
}
lastcost = cost;
}
Expand Down