Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework iteration to avoid overflow #68

Merged
merged 8 commits into from
Oct 24, 2017
8 changes: 4 additions & 4 deletions examples/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,24 +121,24 @@ fn quantiles<R: BufRead, W: Write>(mut reader: R, mut writer: W, quantile_precis
let mut sum = 0;
for v in hist.iter_quantiles(ticks_per_half) {
sum += v.count_since_last_iteration();
if v.quantile() < 1.0 {
if v.quantile_iterated_to() < 1.0 {
writer.write_all(
format!(
"{:12} {:1.*} {:1.*} {:10} {:14.2}\n",
v.value(),
v.value_iterated_to(),
quantile_precision,
v.quantile(),
quantile_precision,
v.quantile_iterated_to(),
sum,
1_f64 / (1_f64 - v.quantile())
1_f64 / (1_f64 - v.quantile_iterated_to())
).as_ref(),
)?;
} else {
writer.write_all(
format!(
"{:12} {:1.*} {:1.*} {:10} {:>14}\n",
v.value(),
v.value_iterated_to(),
quantile_precision,
v.quantile(),
quantile_precision,
Expand Down
18 changes: 8 additions & 10 deletions src/iterators/all.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,31 @@
use core::counter::Counter;
use Histogram;
use iterators::{HistogramIterator, PickyIterator};
use iterators::{HistogramIterator, PickMetadata, PickyIterator};

/// An iterator that will yield every bin.
pub struct Iter { visited: Option<usize> }
pub struct Iter {
visited: Option<usize>,
}

impl Iter {
/// Construct a new full iterator. See `Histogram::iter_all` for details.
pub fn new<'a, T: Counter>(hist: &'a Histogram<T>) -> HistogramIterator<'a, T, Iter> {
HistogramIterator::new(hist, Iter{ visited: None })
HistogramIterator::new(hist, Iter { visited: None })
}
}

impl<T: Counter> PickyIterator<T> for Iter {
fn pick(&mut self, index: usize, _: u64) -> bool {
fn pick(&mut self, index: usize, _: u64, _: T) -> Option<PickMetadata> {
if self.visited.map(|i| i != index).unwrap_or(true) {
// haven't visited this index yet
self.visited = Some(index);
true
Some(PickMetadata::new(None, None))
} else {
false
None
}
}

fn more(&mut self, _: usize) -> bool {
true
}

fn quantile_iterated_to(&self) -> Option<f64> {
None
}
}
61 changes: 35 additions & 26 deletions src/iterators/linear.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use core::counter::Counter;
use Histogram;
use iterators::{HistogramIterator, PickyIterator};
use iterators::{HistogramIterator, PickMetadata, PickyIterator};

/// An iterator that will yield at fixed-size steps through the histogram's value range.
pub struct Iter<'a, T: 'a + Counter> {
Expand All @@ -14,45 +14,54 @@ pub struct Iter<'a, T: 'a + Counter> {

impl<'a, T: 'a + Counter> Iter<'a, T> {
/// Construct a new linear iterator. See `Histogram::iter_linear` for details.
pub fn new(hist: &'a Histogram<T>,
value_units_per_bucket: u64)
-> HistogramIterator<'a, T, Iter<'a, T>> {
assert!(value_units_per_bucket > 0, "value_units_per_bucket must be > 0");
HistogramIterator::new(hist,
Iter {
hist,
value_units_per_bucket,
// won't underflow because value_units_per_bucket > 0
current_step_highest_value_reporting_level: value_units_per_bucket - 1,
current_step_lowest_value_reporting_level:
hist.lowest_equivalent(value_units_per_bucket - 1),
})
pub fn new(
hist: &'a Histogram<T>,
value_units_per_bucket: u64,
) -> HistogramIterator<'a, T, Iter<'a, T>> {
assert!(
value_units_per_bucket > 0,
"value_units_per_bucket must be > 0"
);
HistogramIterator::new(
hist,
Iter {
hist,
value_units_per_bucket,
// won't underflow because value_units_per_bucket > 0
current_step_highest_value_reporting_level: value_units_per_bucket - 1,
current_step_lowest_value_reporting_level: hist.lowest_equivalent(
value_units_per_bucket - 1,
),
},
)
}
}

impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
fn pick(&mut self, index: usize, _: u64) -> bool {
fn pick(&mut self, index: usize, _: u64, _: T) -> Option<PickMetadata> {
let val = self.hist.value_for(index);
if val >= self.current_step_lowest_value_reporting_level || index == self.hist.last_index() {
if val >= self.current_step_lowest_value_reporting_level || index == self.hist.last_index()
{
let metadata =
PickMetadata::new(None, Some(self.current_step_highest_value_reporting_level));
self.current_step_highest_value_reporting_level += self.value_units_per_bucket;
self.current_step_lowest_value_reporting_level = self.hist
.lowest_equivalent(self.current_step_highest_value_reporting_level);
true
Some(metadata)
} else {
false
None
}
}

fn more(&mut self, index: usize) -> bool {
fn more(&mut self, index_to_pick: usize) -> bool {
// If the next iterate will not move to the next sub bucket index (which is empty if
// if we reached this point), then we are not yet done iterating (we want to iterate
// until we are no longer on a value that has a count, rather than util we first reach
// until we are no longer on a value that has a count, rather than until we first reach
// the last value that has a count. The difference is subtle but important)...
// TODO index + 1 could overflow 16-bit usize
self.current_step_highest_value_reporting_level + 1 < self.hist.value_for(index + 1)
}

fn quantile_iterated_to(&self) -> Option<f64> {
None
// When this is called, we're about to begin the "next" iteration, so
// current_step_highest_value_reporting_level has already been incremented,
// and we use it without incrementing its value.
let next_index = index_to_pick.checked_add(1).expect("usize overflow");
self.current_step_highest_value_reporting_level < self.hist.value_for(next_index)
}
}
63 changes: 35 additions & 28 deletions src/iterators/log.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use core::counter::Counter;
use Histogram;
use iterators::{HistogramIterator, PickyIterator};
use iterators::{HistogramIterator, PickMetadata, PickyIterator};

/// An iterator that will yield at log-size steps through the histogram's value range.
pub struct Iter<'a, T: 'a + Counter> {
Expand All @@ -17,51 +17,58 @@ pub struct Iter<'a, T: 'a + Counter> {

impl<'a, T: 'a + Counter> Iter<'a, T> {
/// Construct a new logarithmic iterator. See `Histogram::iter_log` for details.
pub fn new(hist: &'a Histogram<T>,
value_units_in_first_bucket: u64,
log_base: f64)
-> HistogramIterator<'a, T, Iter<'a, T>> {
assert!(value_units_in_first_bucket > 0, "value_units_per_bucket must be > 0");
pub fn new(
hist: &'a Histogram<T>,
value_units_in_first_bucket: u64,
log_base: f64,
) -> HistogramIterator<'a, T, Iter<'a, T>> {
assert!(
value_units_in_first_bucket > 0,
"value_units_per_bucket must be > 0"
);
assert!(log_base > 1.0, "log_base must be > 1.0");
HistogramIterator::new(hist,
Iter {
hist,
log_base,
next_value_reporting_level: value_units_in_first_bucket as f64,
current_step_highest_value_reporting_level: value_units_in_first_bucket -
1,
current_step_lowest_value_reporting_level:
hist.lowest_equivalent(value_units_in_first_bucket - 1),
})
HistogramIterator::new(
hist,
Iter {
hist,
log_base,
next_value_reporting_level: value_units_in_first_bucket as f64,
current_step_highest_value_reporting_level: value_units_in_first_bucket - 1,
current_step_lowest_value_reporting_level: hist.lowest_equivalent(
value_units_in_first_bucket - 1,
),
},
)
}
}

impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
fn pick(&mut self, index: usize, _: u64) -> bool {
fn pick(&mut self, index: usize, _: u64, _: T) -> Option<PickMetadata> {
let val = self.hist.value_for(index);
if val >= self.current_step_lowest_value_reporting_level || index == self.hist.last_index() {
if val >= self.current_step_lowest_value_reporting_level || index == self.hist.last_index()
{
let metadata =
PickMetadata::new(None, Some(self.current_step_highest_value_reporting_level));
// implies log_base must be > 1.0
self.next_value_reporting_level *= self.log_base;
// won't underflow since next_value_reporting_level starts > 0 and only grows
self.current_step_highest_value_reporting_level = self.next_value_reporting_level as u64 - 1;
self.current_step_highest_value_reporting_level =
self.next_value_reporting_level as u64 - 1;
self.current_step_lowest_value_reporting_level = self.hist
.lowest_equivalent(self.current_step_highest_value_reporting_level);
true
Some(metadata)
} else {
false
None
}
}

fn more(&mut self, next_index: usize) -> bool {
fn more(&mut self, index_to_pick: usize) -> bool {
// If the next iterate will not move to the next sub bucket index (which is empty if if we
// reached this point), then we are not yet done iterating (we want to iterate until we are
// no longer on a value that has a count, rather than util we first reach the last value
// that has a count. The difference is subtle but important)...
self.hist.lowest_equivalent(self.next_value_reporting_level as u64) <
self.hist.value_for(next_index)
}

fn quantile_iterated_to(&self) -> Option<f64> {
None
self.hist
.lowest_equivalent(self.next_value_reporting_level as u64)
< self.hist.value_for(index_to_pick)
}
}
Loading