HdrHistogram · marshallpierce · Oct 24, 2017 · Apr 11, 2017 · Oct 14, 2017 · Oct 19, 2017
diff --git a/examples/cli.rs b/examples/cli.rs
@@ -121,24 +121,24 @@ fn quantiles<R: BufRead, W: Write>(mut reader: R, mut writer: W, quantile_precis
     let mut sum = 0;
     for v in hist.iter_quantiles(ticks_per_half) {
         sum += v.count_since_last_iteration();
-        if v.quantile() < 1.0 {
+        if v.quantile_iterated_to() < 1.0 {
             writer.write_all(
                 format!(
                     "{:12} {:1.*} {:1.*} {:10} {:14.2}\n",
-                    v.value(),
+                    v.value_iterated_to(),
                     quantile_precision,
                     v.quantile(),
                     quantile_precision,
                     v.quantile_iterated_to(),
                     sum,
-                    1_f64 / (1_f64 - v.quantile())
+                    1_f64 / (1_f64 - v.quantile_iterated_to())
                 ).as_ref(),
             )?;
         } else {
             writer.write_all(
                 format!(
                     "{:12} {:1.*} {:1.*} {:10} {:>14}\n",
-                    v.value(),
+                    v.value_iterated_to(),
                     quantile_precision,
                     v.quantile(),
                     quantile_precision,

diff --git a/src/iterators/all.rs b/src/iterators/all.rs
@@ -1,6 +1,6 @@
 use Counter;
 use Histogram;
-use iterators::{HistogramIterator, PickyIterator};
+use iterators::{HistogramIterator, PickyIterator, PickMetadata};
 
 /// An iterator that will yield every bin.
 pub struct Iter { visited: Option<usize> }
@@ -13,21 +13,17 @@ impl Iter {
 }
 
 impl<T: Counter> PickyIterator<T> for Iter {
-    fn pick(&mut self, index: usize, _: u64) -> bool {
+    fn pick(&mut self, index: usize, _: u64, _: T) -> Option<PickMetadata> {
         if self.visited.map(|i| i != index).unwrap_or(true) {
             // haven't visited this index yet
             self.visited = Some(index);
-            true
+            Some(PickMetadata::new(None, None))
         } else {
-            false
+            None
         }
     }
 
     fn more(&mut self, _: usize) -> bool {
         true
     }
-
-    fn quantile_iterated_to(&self) -> Option<f64> {
-        None
-    }
 }
diff --git a/src/iterators/linear.rs b/src/iterators/linear.rs
@@ -1,6 +1,6 @@
 use Counter;
 use Histogram;
-use iterators::{HistogramIterator, PickyIterator};
+use iterators::{HistogramIterator, PickyIterator, PickMetadata};
 
 /// An iterator that will yield at fixed-size steps through the histogram's value range.
 pub struct Iter<'a, T: 'a + Counter> {
@@ -31,28 +31,28 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
 }
 
 impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
-    fn pick(&mut self, index: usize, _: u64) -> bool {
+    fn pick(&mut self, index: usize, _: u64, _: T) -> Option<PickMetadata> {
         let val = self.hist.value_for(index);
         if val >= self.current_step_lowest_value_reporting_level || index == self.hist.last_index() {
+            let metadata = PickMetadata::new(None, Some(self.current_step_highest_value_reporting_level));
             self.current_step_highest_value_reporting_level += self.value_units_per_bucket;
             self.current_step_lowest_value_reporting_level = self.hist
-                .lowest_equivalent(self.current_step_highest_value_reporting_level);
-            true
+                    .lowest_equivalent(self.current_step_highest_value_reporting_level);
+            Some(metadata)
         } else {
-            false
+            None
         }
     }
 
-    fn more(&mut self, index: usize) -> bool {
+    fn more(&mut self, index_to_pick: usize) -> bool {
         // If the next iterate will not move to the next sub bucket index (which is empty if
         // if we reached this point), then we are not yet done iterating (we want to iterate
-        // until we are no longer on a value that has a count, rather than util we first reach
+        // until we are no longer on a value that has a count, rather than until we first reach
         // the last value that has a count. The difference is subtle but important)...
-        // TODO index + 1 could overflow 16-bit usize
-        self.current_step_highest_value_reporting_level + 1 < self.hist.value_for(index + 1)
-    }
-
-    fn quantile_iterated_to(&self) -> Option<f64> {
-        None
+        // When this is called, we're about to begin the "next" iteration, so
+        // current_step_highest_value_reporting_level has already been incremented,
+        // and we use it without incrementing its value.
+        let next_index = index_to_pick.checked_add(1).expect("usize overflow");
+        self.current_step_highest_value_reporting_level < self.hist.value_for(next_index)
     }
 }
diff --git a/src/iterators/log.rs b/src/iterators/log.rs
@@ -1,6 +1,6 @@
 use Counter;
 use Histogram;
-use iterators::{HistogramIterator, PickyIterator};
+use iterators::{HistogramIterator, PickyIterator, PickMetadata};
 
 /// An iterator that will yield at log-size steps through the histogram's value range.
 pub struct Iter<'a, T: 'a + Counter> {
@@ -37,31 +37,28 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
 }
 
 impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
-    fn pick(&mut self, index: usize, _: u64) -> bool {
+    fn pick(&mut self, index: usize, _: u64, _: T) -> Option<PickMetadata> {
         let val = self.hist.value_for(index);
         if val >= self.current_step_lowest_value_reporting_level || index == self.hist.last_index() {
+            let metadata = PickMetadata::new(None, Some(self.current_step_highest_value_reporting_level));
             // implies log_base must be > 1.0
             self.next_value_reporting_level *= self.log_base;
             // won't underflow since next_value_reporting_level starts > 0 and only grows
             self.current_step_highest_value_reporting_level = self.next_value_reporting_level as u64 - 1;
             self.current_step_lowest_value_reporting_level = self.hist
-                .lowest_equivalent(self.current_step_highest_value_reporting_level);
-            true
+                    .lowest_equivalent(self.current_step_highest_value_reporting_level);
+            Some(metadata)
         } else {
-            false
+            None
         }
     }
 
-    fn more(&mut self, next_index: usize) -> bool {
+    fn more(&mut self, index_to_pick: usize) -> bool {
         // If the next iterate will not move to the next sub bucket index (which is empty if if we
         // reached this point), then we are not yet done iterating (we want to iterate until we are
         // no longer on a value that has a count, rather than util we first reach the last value
         // that has a count. The difference is subtle but important)...
         self.hist.lowest_equivalent(self.next_value_reporting_level as u64) <
-            self.hist.value_for(next_index)
-    }
-
-    fn quantile_iterated_to(&self) -> Option<f64> {
-        None
+            self.hist.value_for(index_to_pick)
     }
 }
diff --git a/src/iterators/mod.rs b/src/iterators/mod.rs
@@ -16,18 +16,44 @@ pub mod recorded;
 /// An iterator that iterates over histogram values.
 pub mod all;
 
+/// Extra information about the picked point in the histogram provided by the picker.
+pub struct PickMetadata {
+    /// Supply the quantile iterated to in the last `pick()`, if available. If `None` is provided,
+    /// the quantile of the current value will be used instead. Probably only useful for the
+    /// quantile iterator.
+    quantile_iterated_to: Option<f64>,
+
+    /// Supply the value iterated to in the last `pick()`, if the picker can supply a more useful
+    /// value than the largest value represented by the bucket.
+    value_iterated_to: Option<u64>
+}
+
+impl PickMetadata {
+    fn new(quantile_iterated_to: Option<f64>, value_iterated_to: Option<u64>) -> PickMetadata {
+        PickMetadata {
+            quantile_iterated_to,
+            value_iterated_to
+        }
+    }
+}
+
 /// A trait for designing an subset iterator over values in a `Histogram`.
 pub trait PickyIterator<T: Counter> {
-    /// should an item be yielded for the given index?
+    /// Return `Some` if an `IterationValue` should be emitted at this point.
+    ///
     /// `index` is a valid index in the relevant histogram.
-    fn pick(&mut self, index: usize, total_count_to_index: u64) -> bool;
-    /// should we keep iterating even though all future indices are zeros?
-    fn more(&mut self, index: usize) -> bool;
-
-    /// Supply the quantile iterated to in the last `pick()`, if available. If `None` is returned,
-    /// the quantile of the current value will be used instead. Probably only useful for the
-    /// quantile iterator.
-    fn quantile_iterated_to(&self) -> Option<f64>;
+    ///
+    /// This will be called with the same index until it returns `None`. This enables modes of
+    /// iteration that pick different values represented by the same bucket, for instance.
+    fn pick(&mut self, index: usize, total_count_to_index: u64, count_at_index: T) -> Option<PickMetadata>;
+
+    /// Should we keep iterating even though the last index with non-zero count has already been
+    /// picked at least once?
+    ///
+    /// This will be called on every iteration once the last index with non-zero count has been
+    /// picked, even if the index was not advanced in the last iteration (because `pick()` returned
+    /// `Some`).
+    fn more(&mut self, index_to_pick: usize) -> bool;
 }
 
 /// `HistogramIterator` provides a base iterator for a `Histogram`.
@@ -37,17 +63,14 @@ pub trait PickyIterator<T: Counter> {
 /// sophisticated iterators, a *picker* is also provided, which is allowed to only select some bins
 /// that should be yielded. The picker may also extend the iteration to include a suffix of empty
 /// bins.
-///
-/// One peculiarity of this iterator is that, if the picker does choose to yield a particular bin,
-/// that bin *is re-visited* before moving on to later bins. It is not clear why this is, but it is
-/// how the iterators were implemented in the original HdrHistogram, so we preserve the behavior
-/// here. This is the reason why iterators such as all and recorded need to keep track of which
-/// indices they have already visited.
 pub struct HistogramIterator<'a, T: 'a + Counter, P: PickyIterator<T>> {
     hist: &'a Histogram<T>,
     total_count_to_index: u64,
-    prev_total_count: u64,
+    count_since_last_iteration: u64,
+    count_at_index: T,
     current_index: usize,
+    last_picked_index: usize,
+    max_value_index: usize,
     fresh: bool,
     ended: bool,
     picker: P,
@@ -56,7 +79,7 @@ pub struct HistogramIterator<'a, T: 'a + Counter, P: PickyIterator<T>> {
 /// The value emitted at each step when iterating over a `Histogram`.
 #[derive(Debug, PartialEq)]
 pub struct IterationValue<T: Counter> {
-    value: u64,
+    value_iterated_to: u64,
     quantile: f64,
     quantile_iterated_to: f64,
     count_at_value: T,
@@ -65,34 +88,35 @@ pub struct IterationValue<T: Counter> {
 
 impl<T: Counter> IterationValue<T> {
     /// Create a new IterationValue.
-    pub fn new(value: u64, quantile: f64, quantile_iterated_to: f64, count_at_value: T,
+    pub fn new(value_iterated_to: u64, quantile: f64, quantile_iterated_to: f64, count_at_value: T,
                count_since_last_iteration: u64) -> IterationValue<T> {
         IterationValue {
-            value,
+            value_iterated_to,
             quantile,
             quantile_iterated_to,
             count_at_value,
             count_since_last_iteration
         }
     }
 
-    /// The lowest value stored in the current histogram bin
-    pub fn value(&self) -> u64 {
-        self.value
+    /// The value iterated to. Some iterators provide a specific value inside the bucket, while
+    /// others just use the highest value in the bucket.
+    pub fn value_iterated_to(&self) -> u64 {
+        self.value_iterated_to
     }
 
-    /// Percent of recorded values that are equivalent to or below `value`.
+    /// Percent of recorded values that are at or below the current bucket.
     /// This is simply the quantile multiplied by 100.0, so if you care about maintaining the best
     /// floating-point precision, use `quantile()` instead.
     pub fn percentile(&self) -> f64 {
         self.quantile * 100.0
     }
 
-    /// Quantile of recorded values that are equivalent to or below `value`
+    /// Quantile of recorded values that are at or below the current bucket.
     pub fn quantile(&self) -> f64 { self.quantile }
 
-    /// Quantile iterated to, which in the case of quantile iteration may be different from
-    /// `quantile` because slightly different quantiles can still map to the same bucket.
+    /// Quantile iterated to, which may be different than `quantile()` when an iterator provides
+    /// information about the specific quantile it's iterating to.
     pub fn quantile_iterated_to(&self) -> f64 { self.quantile_iterated_to }
 
     /// Recorded count for values equivalent to `value`
@@ -111,25 +135,16 @@ impl<'a, T: Counter, P: PickyIterator<T>> HistogramIterator<'a, T, P> {
         HistogramIterator {
             hist: h,
             total_count_to_index: 0,
-            prev_total_count: 0,
+            count_since_last_iteration: 0,
+            count_at_index: T::zero(),
             current_index: 0,
+            last_picked_index: 0,
+            max_value_index: h.index_for(h.max()).expect("Either 0 or an existing index"),
             picker,
             fresh: true,
             ended: false,
         }
     }
-
-    fn current(&self) -> IterationValue<T> {
-        let quantile = self.total_count_to_index as f64 / self.hist.count() as f64;
-        IterationValue {
-            value: self.hist.highest_equivalent(self.hist.value_for(self.current_index)),
-            quantile,
-            quantile_iterated_to: self.picker.quantile_iterated_to().unwrap_or(quantile),
-            count_at_value: self.hist.count_at_index(self.current_index)
-                .expect("current index cannot exceed counts length"),
-            count_since_last_iteration: self.total_count_to_index - self.prev_total_count
-        }
-    }
 }
 
 impl<'a, T: 'a, P> Iterator for HistogramIterator<'a, T, P>
@@ -155,50 +170,53 @@ impl<'a, T: 'a, P> Iterator for HistogramIterator<'a, T, P>
                 return None;
             }
 
-            // TODO should check if we've reached max, not count, to avoid early termination
-            // on histograms with very large counts whose total would exceed u64::max_value()
-            // Have we yielded all non-zeros in the histogram?
-            let total = self.hist.count();
-            if self.prev_total_count == total {
+            // Have we already picked the index with the last non-zero count in the histogram?
+            if self.last_picked_index >= self.max_value_index {
                 // is the picker done?
                 if !self.picker.more(self.current_index) {
                     self.ended = true;
                     return None;
                 }
-
-            // nope -- alright, let's keep iterating
             } else {
+                // nope -- alright, let's keep iterating
                 assert!(self.current_index < self.hist.len());
-                assert!(self.prev_total_count < total);
 
                 if self.fresh {
-                    let count = self.hist.count_at_index(self.current_index)
-                        .expect("Already checked that current_index is < counts len");
-
-                    // if we've seen all counts, no other counts should be non-zero
-                    if self.total_count_to_index == total {
-                        // TODO this can fail when total count overflows
-                        assert_eq!(count, T::zero());
-                    }
+                    // at a new index, and not past the max, so there's nonzero counts to add
+                    self.count_at_index = self.hist.count_at_index(self.current_index)
+                            .expect("Already checked that current_index is < counts len");
 
-                    // TODO overflow
-                    self.total_count_to_index = self.total_count_to_index + count.as_u64();
+                    self.total_count_to_index =
+                            self.total_count_to_index.saturating_add(self.count_at_index.as_u64());
+                    self.count_since_last_iteration =
+                            self.count_since_last_iteration.saturating_add(self.count_at_index.as_u64());
 
                     // make sure we don't add this index again
                     self.fresh = false;
                 }
             }
 
             // figure out if picker thinks we should yield this value
-            if self.picker.pick(self.current_index, self.total_count_to_index) {
-                let val = self.current();
-
-                // note that we *don't* increment self.current_index here. the picker will be
-                // exposed to the same value again after yielding. not sure why this is the
-                // behavior we want, but it's what the original Java implementation dictates.
-
-                // TODO count starting at 0 each time we emit a value to be less prone to overflow
-                self.prev_total_count = self.total_count_to_index;
+            if let Some(metadata) = self.picker.pick(self.current_index, self.total_count_to_index, self.count_at_index) {
+                let quantile = self.total_count_to_index as f64 / self.hist.count() as f64;
+                let val = IterationValue {
+                    value_iterated_to: metadata.value_iterated_to
+                            .unwrap_or(self.hist.highest_equivalent(self.hist.value_for(self.current_index))),
+                    quantile,
+                    quantile_iterated_to: metadata.quantile_iterated_to.unwrap_or(quantile),
+                    count_at_value: self.hist.count_at_index(self.current_index)
+                            .expect("current index cannot exceed counts length"),
+                    count_since_last_iteration: self.count_since_last_iteration
+                };
+
+                // Note that we *don't* increment self.current_index here. The picker will be
+                // exposed to the same value again after yielding. This is to allow a picker to
+                // pick multiple times at the same index. An example of this is how the linear
+                // picker may be using a step size smaller than the bucket size, so it should
+                // step multiple times without advancing the index.
+
+                self.count_since_last_iteration = 0;
+                self.last_picked_index = self.current_index;
                 return Some(val);
             }