From 6d11209c3e8da90b83b4b5e9269e721bd5672a6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Tue, 6 Aug 2024 14:23:56 +0200 Subject: [PATCH 01/15] Implement SkipTo for iter::Iter --- src/bitmap/container.rs | 16 ++++++++ src/bitmap/iter.rs | 64 ++++++++++++++++++++++++++++- src/bitmap/store/array_store/mod.rs | 43 ++++++++++++++++++- src/bitmap/store/bitmap_store.rs | 61 +++++++++++++++++++++++++++ src/bitmap/store/mod.rs | 14 +++++++ src/lib.rs | 9 ++++ tests/skip_to.rs | 12 ++++++ 7 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 tests/skip_to.rs diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index 3ab1e5ad..9dbb14cf 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -181,6 +181,13 @@ impl Container { } }; } + + pub(crate) fn skip_to_iter(&self, n: u16) -> Iter<'_> { + Iter { + key: self.key, + inner: self.store.skip_to_iter(n), + } + } } impl BitOr<&Container> for &Container { @@ -313,3 +320,12 @@ impl fmt::Debug for Container { format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter) } } + +impl<'a> Iter<'a> { + pub fn empty() -> Self { + Self { + key: 0, + inner: store::Iter::Empty, + } + } +} \ No newline at end of file diff --git a/src/bitmap/iter.rs b/src/bitmap/iter.rs index 59463a21..1cf698e9 100644 --- a/src/bitmap/iter.rs +++ b/src/bitmap/iter.rs @@ -3,13 +3,17 @@ use core::iter; use core::slice; use super::container::Container; -use crate::{NonSortedIntegers, RoaringBitmap}; +use crate::{NonSortedIntegers, RoaringBitmap, SkipTo}; #[cfg(not(feature = "std"))] use alloc::vec::Vec; +use crate::bitmap::container; +use crate::bitmap::util::split; /// An iterator for `RoaringBitmap`. pub struct Iter<'a> { + // needed to support skip_to + containers: &'a [Container], inner: iter::Flatten>, size_hint: u64, } @@ -23,7 +27,7 @@ pub struct IntoIter { impl Iter<'_> { fn new(containers: &[Container]) -> Iter { let size_hint = containers.iter().map(|c| c.len()).sum(); - Iter { inner: containers.iter().flatten(), size_hint } + Iter { containers, inner: containers.iter().flatten(), size_hint } } } @@ -298,3 +302,59 @@ impl RoaringBitmap { Ok(count) } } + +impl<'a> SkipTo for Iter<'a> { + type Iter = SkipToIter<'a>; + + fn skip_to(self, n: u32) -> Self::Iter { + SkipToIter::new(&self.containers, n) + } +} +pub struct SkipToIter<'a> { + container_index: usize, + containers: &'a [Container], + iter: container::Iter<'a> +} + +impl<'a> SkipToIter<'a> { + fn new(containers: &'a [Container], n: u32) -> Self { + let (key, v) = split(n); + match containers.binary_search_by_key(&key, |e| e.key) { + Ok(index) | Err(index) => { + if index == containers.len() { + // there are no containers with a key <= n. Return empty + Self { + container_index: 0, + containers: &[], + iter: container::Iter::empty(), + } + } else { + Self { + container_index: 0, + containers: &containers[index..], + iter: (&containers[index]).skip_to_iter(v), + } + } + } + } + } +} + +impl Iterator for SkipToIter<'_> { + type Item = u32; + + fn next(&mut self) -> Option { + let next = self.iter.next(); + if next.is_some() { + next + } else if self.container_index == self.containers.len() - 1 { + None + } else { + self.container_index += 1; + self.iter = (&self.containers[self.container_index]).into_iter(); + // we can call self.iter.next() directly here if we know that a + // container can't be empty. Do we? + self.next() + } + } +} \ No newline at end of file diff --git a/src/bitmap/store/array_store/mod.rs b/src/bitmap/store/array_store/mod.rs index 883db31f..710a644c 100644 --- a/src/bitmap/store/array_store/mod.rs +++ b/src/bitmap/store/array_store/mod.rs @@ -13,7 +13,6 @@ use alloc::vec::Vec; #[cfg(not(feature = "std"))] use alloc::boxed::Box; - use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; #[derive(Clone, Eq, PartialEq)] @@ -227,6 +226,18 @@ impl ArrayStore { self.vec.iter() } + pub fn skip_to_iter(&self, n: u16) -> core::slice::Iter { + match self.vec.as_slice().binary_search(&n) { + Ok(index) | Err(index) => { + if index == self.vec.len() { + Default::default() + } else { + (&self.vec[index..]).into_iter() + } + } + } + } + pub fn into_iter(self) -> alloc::vec::IntoIter { self.vec.into_iter() } @@ -434,6 +445,36 @@ mod tests { } } + #[test] + fn test_skip_to_iter() { + let mut store = ArrayStore::new(); + store.insert_range(0..=16); + store.insert_range(32..=64); + + // test direct hits + let mut iter = store.skip_to_iter(32); + for n in 32..=64 { + assert_eq!(iter.next(), Some(&n)); + } + + // test index in gap + let mut i = store.skip_to_iter(17); + assert_eq!(i.next(), Some(&32)); + + // test index after end + let mut i = store.skip_to_iter(65); + assert_eq!(i.next(), None); + + // test last value + let mut i = store.skip_to_iter(64); + assert_eq!(i.next(), Some(&64)); + + // test first value + let mut i = store.skip_to_iter(0); + assert_eq!(i.next(), Some(&0)); + + } + #[test] #[allow(clippy::reversed_empty_ranges)] fn test_array_insert_invalid_range() { diff --git a/src/bitmap/store/bitmap_store.rs b/src/bitmap/store/bitmap_store.rs index f349a2aa..fad87287 100644 --- a/src/bitmap/store/bitmap_store.rs +++ b/src/bitmap/store/bitmap_store.rs @@ -304,6 +304,10 @@ impl BitmapStore { BitmapIter::new(&self.bits) } + pub(crate) fn skip_to_iter(&self, n: u16) -> BitmapIter<&[u64; BITMAP_LENGTH]> { + BitmapIter::new_from(&self.bits, n) + } + pub fn into_iter(self) -> BitmapIter> { BitmapIter::new(self.bits) } @@ -421,6 +425,34 @@ impl> BitmapIter { bits, } } + + fn new_from(bits: B, n: u16) -> BitmapIter { + let key = key(n); + let mut value = bits.borrow()[key]; + + // There is probably a way to calculate value much more efficiently + // using bit(n) and some bit magic, but I haven't figured that one out + let mut prev_value; + loop { + if value == 0 { + prev_value = 0; + break; + } + let index = value.trailing_zeros() as usize; + prev_value = value; + value &= value - 1; + if (64 * key + index) as u16 >= n { + break; + } + } + BitmapIter { + key, + value: prev_value, + key_back: BITMAP_LENGTH - 1, + value_back: bits.borrow()[BITMAP_LENGTH - 1], + bits, + } + } } impl> Iterator for BitmapIter { @@ -556,6 +588,35 @@ impl BitXorAssign<&ArrayStore> for BitmapStore { mod tests { use super::*; + #[test] + fn test_skip_to_iter() { + let mut store = BitmapStore::new(); + store.insert_range(0..=16); + store.insert_range(32..=64); + + // test direct hits + let mut iter = store.skip_to_iter(32); + for n in 32..=64 { + assert_eq!(iter.next(), Some(n)); + } + + // test index in gap + let mut i = store.skip_to_iter(17); + assert_eq!(i.next(), Some(32)); + + // test index after end + let mut i = store.skip_to_iter(65); + assert_eq!(i.next(), None); + + // test last value + let mut i = store.skip_to_iter(64); + assert_eq!(i.next(), Some(64)); + + // test first value + let mut i = store.skip_to_iter(0); + assert_eq!(i.next(), Some(0)); + + } #[test] fn test_bitmap_remove_smallest() { let mut store = BitmapStore::new(); diff --git a/src/bitmap/store/mod.rs b/src/bitmap/store/mod.rs index bb0d5822..26f738b9 100644 --- a/src/bitmap/store/mod.rs +++ b/src/bitmap/store/mod.rs @@ -30,6 +30,7 @@ pub enum Iter<'a> { Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), + Empty } impl Store { @@ -218,6 +219,17 @@ impl Store { Bitmap(_) => self.clone(), } } + + pub(crate) fn skip_to_iter(&self, n: u16) -> Iter<'_> { + match &self { + Array(a) => { + Iter::Array(a.skip_to_iter(n)) + } + Bitmap(bm) => { + Iter::BitmapBorrowed(bm.skip_to_iter(n)) + } + } + } } impl Default for Store { @@ -506,6 +518,7 @@ impl<'a> Iterator for Iter<'a> { Iter::Vec(inner) => inner.next(), Iter::BitmapBorrowed(inner) => inner.next(), Iter::BitmapOwned(inner) => inner.next(), + Iter::Empty => None, } } } @@ -517,6 +530,7 @@ impl DoubleEndedIterator for Iter<'_> { Iter::Vec(inner) => inner.next_back(), Iter::BitmapBorrowed(inner) => inner.next_back(), Iter::BitmapOwned(inner) => inner.next_back(), + Iter::Empty => None, } } } diff --git a/src/lib.rs b/src/lib.rs index b44c19c2..af7bfd4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ extern crate byteorder; #[macro_use] extern crate alloc; +extern crate core; use core::fmt; @@ -93,3 +94,11 @@ pub trait MultiOps: IntoIterator { /// The `symmetric difference` between all elements. fn symmetric_difference(self) -> Self::Output; } + +/// Get an iterator that yields values >= `n` +pub trait SkipTo { + /// The type of iterator that will be returned + type Iter: Iterator; + /// Get an iterator that yields values >= `n` + fn skip_to(self, n: u32) -> Self::Iter; +} diff --git a/tests/skip_to.rs b/tests/skip_to.rs new file mode 100644 index 00000000..20a8a9cc --- /dev/null +++ b/tests/skip_to.rs @@ -0,0 +1,12 @@ +use roaring::{RoaringBitmap, SkipTo}; + +#[test] +fn basic() { + let bm = RoaringBitmap::from([1,2,3,4,11,12,13,14]); + let mut i = bm.iter().skip_to(10); + for n in 11..=14 { + assert_eq!(i.next(), Some(n)) + } + assert_eq!(i.next(), None); + +} \ No newline at end of file From df0b3cd142af5b6b72d0ae33abe4deb63838e5f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Tue, 6 Aug 2024 15:04:28 +0200 Subject: [PATCH 02/15] Fix panic when calling skip_to on iterator from empty bitmap --- src/bitmap/iter.rs | 3 +++ tests/skip_to.rs | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/bitmap/iter.rs b/src/bitmap/iter.rs index 1cf698e9..a4415a5e 100644 --- a/src/bitmap/iter.rs +++ b/src/bitmap/iter.rs @@ -344,6 +344,9 @@ impl Iterator for SkipToIter<'_> { type Item = u32; fn next(&mut self) -> Option { + if self.containers.is_empty() { + return None + } let next = self.iter.next(); if next.is_some() { next diff --git a/tests/skip_to.rs b/tests/skip_to.rs index 20a8a9cc..6b19f8ba 100644 --- a/tests/skip_to.rs +++ b/tests/skip_to.rs @@ -9,4 +9,10 @@ fn basic() { } assert_eq!(i.next(), None); +} + +#[test] +fn empty() { + let bm = RoaringBitmap::new(); + assert_eq!(bm.iter().skip_to(31337).next(), None) } \ No newline at end of file From c7f02c626d2bbcf5895774ea5b7ba175ffc2d5cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Tue, 6 Aug 2024 15:20:03 +0200 Subject: [PATCH 03/15] Make it compile on rust 1.66 --- src/bitmap/store/array_store/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitmap/store/array_store/mod.rs b/src/bitmap/store/array_store/mod.rs index 710a644c..f509bb19 100644 --- a/src/bitmap/store/array_store/mod.rs +++ b/src/bitmap/store/array_store/mod.rs @@ -230,7 +230,7 @@ impl ArrayStore { match self.vec.as_slice().binary_search(&n) { Ok(index) | Err(index) => { if index == self.vec.len() { - Default::default() + [].iter() } else { (&self.vec[index..]).into_iter() } From e8c57e0104f9e5def32b86fe7792ff15d9db1c90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Tue, 13 Aug 2024 12:26:32 +0200 Subject: [PATCH 04/15] Fix clippy warnings --- src/bitmap/iter.rs | 4 ++-- src/bitmap/store/array_store/mod.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bitmap/iter.rs b/src/bitmap/iter.rs index a4415a5e..58894b38 100644 --- a/src/bitmap/iter.rs +++ b/src/bitmap/iter.rs @@ -307,7 +307,7 @@ impl<'a> SkipTo for Iter<'a> { type Iter = SkipToIter<'a>; fn skip_to(self, n: u32) -> Self::Iter { - SkipToIter::new(&self.containers, n) + SkipToIter::new(self.containers, n) } } pub struct SkipToIter<'a> { @@ -332,7 +332,7 @@ impl<'a> SkipToIter<'a> { Self { container_index: 0, containers: &containers[index..], - iter: (&containers[index]).skip_to_iter(v), + iter: containers[index].skip_to_iter(v), } } } diff --git a/src/bitmap/store/array_store/mod.rs b/src/bitmap/store/array_store/mod.rs index f509bb19..a1717f90 100644 --- a/src/bitmap/store/array_store/mod.rs +++ b/src/bitmap/store/array_store/mod.rs @@ -232,7 +232,7 @@ impl ArrayStore { if index == self.vec.len() { [].iter() } else { - (&self.vec[index..]).into_iter() + self.vec[index..].iter() } } } From bb2a249001e9fe712981cd9ca8d29d20d1669229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Thu, 22 Aug 2024 06:51:34 +0200 Subject: [PATCH 05/15] Implement advance_to and iter_from --- src/bitmap/container.rs | 24 +- src/bitmap/iter.rs | 388 ++++++++++++++++------------ src/bitmap/store/array_store/mod.rs | 44 +--- src/bitmap/store/bitmap_store.rs | 61 ----- src/bitmap/store/mod.rs | 13 +- src/lib.rs | 8 - tests/iter_advance_to.rs | 69 +++++ tests/skip_to.rs | 18 -- 8 files changed, 311 insertions(+), 314 deletions(-) create mode 100644 tests/iter_advance_to.rs delete mode 100644 tests/skip_to.rs diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index 9dbb14cf..0af1a558 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -181,13 +181,6 @@ impl Container { } }; } - - pub(crate) fn skip_to_iter(&self, n: u16) -> Iter<'_> { - Iter { - key: self.key, - inner: self.store.skip_to_iter(n), - } - } } impl BitOr<&Container> for &Container { @@ -307,6 +300,10 @@ impl<'a> Iterator for Iter<'a> { fn next(&mut self) -> Option { self.inner.next().map(|i| util::join(self.key, i)) } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } } impl DoubleEndedIterator for Iter<'_> { @@ -323,9 +320,12 @@ impl fmt::Debug for Container { impl<'a> Iter<'a> { pub fn empty() -> Self { - Self { - key: 0, - inner: store::Iter::Empty, - } + Self { key: 0, inner: store::Iter::Empty } } -} \ No newline at end of file +} + +impl AsRef for Container { + fn as_ref(&self) -> &Container { + self + } +} diff --git a/src/bitmap/iter.rs b/src/bitmap/iter.rs index 58894b38..afc86634 100644 --- a/src/bitmap/iter.rs +++ b/src/bitmap/iter.rs @@ -1,136 +1,185 @@ use alloc::vec; -use core::iter; use core::slice; use super::container::Container; -use crate::{NonSortedIntegers, RoaringBitmap, SkipTo}; +use crate::{NonSortedIntegers, RoaringBitmap}; +use crate::bitmap::{container, util}; #[cfg(not(feature = "std"))] use alloc::vec::Vec; -use crate::bitmap::container; -use crate::bitmap::util::split; - -/// An iterator for `RoaringBitmap`. -pub struct Iter<'a> { - // needed to support skip_to - containers: &'a [Container], - inner: iter::Flatten>, - size_hint: u64, -} - -/// An iterator for `RoaringBitmap`. -pub struct IntoIter { - inner: iter::Flatten>, - size_hint: u64, -} +use std::iter::Peekable; + +macro_rules! impl_iter { + ($t:ident,$c:ty, $c_iter:ty, $iter_lt:lifetime $(,$lt:lifetime)?) => { + /// An iterator for `RoaringBitmap`. + pub struct $t$(<$lt>)? { + containers_iter: $c_iter, + iter_front: Option>, + iter_back: Option>, + size_hint: u64, + } -impl Iter<'_> { - fn new(containers: &[Container]) -> Iter { - let size_hint = containers.iter().map(|c| c.len()).sum(); - Iter { containers, inner: containers.iter().flatten(), size_hint } - } -} + impl$(<$lt>)? $t$(<$lt>)? { + fn new(containers: $c) -> Self { + let size_hint = containers.iter().map(|c| c.len()).sum(); + let containers_iter = containers.into_iter(); + Self { + containers_iter, + iter_front: None, + iter_back: None, + size_hint, + } + } + } -impl IntoIter { - fn new(containers: Vec) -> IntoIter { - let size_hint = containers.iter().map(|c| c.len()).sum(); - IntoIter { inner: containers.into_iter().flatten(), size_hint } - } -} + impl$(<$lt>)? Iterator for $t$(<$lt>)? { + type Item = u32; -impl Iterator for Iter<'_> { - type Item = u32; + fn next(&mut self) -> Option { + self.size_hint = self.size_hint.saturating_sub(1); + loop { + if let Some(iter) = &mut self.iter_front { + if let item @ Some(_) = iter.next() { + return item + } + } + if let Some(container) = self.containers_iter.next() { + self.iter_front = Some(container.into_iter()) + } else if let Some(iter) = &mut self.iter_back { + return iter.next() + } else { + return None + } + } + } - fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next() - } + fn size_hint(&self) -> (usize, Option) { + if self.size_hint < usize::MAX as u64 { + (self.size_hint as usize, Some(self.size_hint as usize)) + } else { + (usize::MAX, None) + } + } - fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + match (self.iter_front, self.iter_back) { + (Some(iter_front), Some(iter_back)) => { + iter_front.chain(self.containers_iter.flatten()).chain(iter_back).fold(init, f) + }, + (Some(iter_front), None) => { + iter_front.chain(self.containers_iter.flatten()).fold(init, f) + }, + (None, Some(iter_back)) => { + self.containers_iter.flatten().chain(iter_back).fold(init, f) + }, + (None, None) => self.containers_iter.flatten().fold(init, f) + } + } } - } - - #[inline] - fn fold(self, init: B, f: F) -> B - where - Self: Sized, - F: FnMut(B, Self::Item) -> B, - { - self.inner.fold(init, f) - } -} -impl DoubleEndedIterator for Iter<'_> { - fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next_back() - } + impl$(<$lt>)? DoubleEndedIterator for $t$(<$lt>)? { + fn next_back(&mut self) -> Option { + self.size_hint = self.size_hint.saturating_sub(1); + loop { + if let Some(iter) = &mut self.iter_back { + if let item @ Some(_) = iter.next_back() { + return item + } + } + if let Some(container) = self.containers_iter.next_back() { + self.iter_back = Some(container.into_iter()) + } else if let Some(iter) = &mut self.iter_front { + return iter.next_back() + } else { + return None + } + } + } - #[inline] - fn rfold(self, init: Acc, fold: Fold) -> Acc - where - Fold: FnMut(Acc, Self::Item) -> Acc, - { - self.inner.rfold(init, fold) - } + #[inline] + fn rfold(self, init: Acc, f: Fold) -> Acc + where + Fold: FnMut(Acc, Self::Item) -> Acc, + { + match (self.iter_front, self.iter_back) { + (Some(iter_front), Some(iter_back)) => { + iter_front.chain(self.containers_iter.flatten()).chain(iter_back).rfold(init, f) + }, + (Some(iter_front), None) => { + iter_front.chain(self.containers_iter.flatten()).rfold(init, f) + }, + (None, Some(iter_back)) => { + self.containers_iter.flatten().chain(iter_back).rfold(init, f) + }, + (None, None) => self.containers_iter.flatten().rfold(init, f) + } + } + } + #[cfg(target_pointer_width = "64")] + impl$(<$lt>)? ExactSizeIterator for $t$(<$lt>)? { + fn len(&self) -> usize { + self.size_hint as usize + } + } + }; } +impl_iter!(Iter, &'a [Container], slice::Iter<'a, Container>, 'a, 'a); +impl_iter!(IntoIter, Vec, vec::IntoIter, 'static); -#[cfg(target_pointer_width = "64")] -impl ExactSizeIterator for Iter<'_> { - fn len(&self) -> usize { - self.size_hint as usize - } +pub struct AdvanceToIter<'a, CI> { + containers_iter: CI, + iter: Peekable>, } -impl Iterator for IntoIter { - type Item = u32; - - fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next() - } - - fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) +#[allow(private_bounds)] +impl<'a, CI> AdvanceToIter<'a, CI> +where + Self: AdvanceIterContainer<'a>, +{ + fn new(containers_iter: CI, iter: container::Iter<'a>, n: u32) -> Self { + let mut result = Self { containers_iter, iter: iter.peekable() }; + if let Some(peek) = result.iter.peek().cloned() { + if peek < n { + let (peek_key, _) = util::split(peek); + let (target_key, _) = util::split(n); + if target_key > peek_key { + while let Some(next_key) = result.advance_container() { + if next_key >= target_key { + break; + } + } + } + while let Some(peek) = result.iter.peek() { + if *peek >= n { + break; + } else { + result.iter.next(); + } + } + } } - } - - #[inline] - fn fold(self, init: B, f: F) -> B - where - Self: Sized, - F: FnMut(B, Self::Item) -> B, - { - self.inner.fold(init, f) + result } } -impl DoubleEndedIterator for IntoIter { - fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - self.inner.next_back() - } - - #[inline] - fn rfold(self, init: Acc, fold: Fold) -> Acc - where - Fold: FnMut(Acc, Self::Item) -> Acc, - { - self.inner.rfold(init, fold) - } -} +impl<'a, CI> Iterator for AdvanceToIter<'a, CI> +where + Self: AdvanceIterContainer<'a>, +{ + type Item = u32; -#[cfg(target_pointer_width = "64")] -impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { - self.size_hint as usize + fn next(&mut self) -> Option { + loop { + if let item @ Some(_) = self.iter.next() { + return item; + } + self.advance_container()?; + } } } @@ -153,6 +202,41 @@ impl RoaringBitmap { pub fn iter(&self) -> Iter { Iter::new(&self.containers) } + + /// Iterator over each value >= `n` stored in the RoaringBitmap, guarantees values are ordered + /// by value. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.iter_from(2); + /// + /// assert_eq!(iter.next(), Some(2)); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn iter_from(&self, n: u32) -> AdvanceToIter<'_, slice::Iter<'_, Container>> { + let (key, _) = util::split(n); + match self.containers.binary_search_by_key(&key, |container| container.key) { + Ok(index) | Err(index) => { + if index == self.containers.len() { + // no container has a key >= key(n) + AdvanceToIter::new([].iter(), container::Iter::empty(), n) + } else { + let containers = &self.containers[index..]; + let iter = (&containers[0]).into_iter(); + if index + 1 < containers.len() - 1 { + AdvanceToIter::new(containers[index + 1..].iter(), iter, n) + } else { + AdvanceToIter::new([].iter(), iter, n) + } + } + } + } + } } impl<'a> IntoIterator for &'a RoaringBitmap { @@ -303,61 +387,45 @@ impl RoaringBitmap { } } -impl<'a> SkipTo for Iter<'a> { - type Iter = SkipToIter<'a>; - - fn skip_to(self, n: u32) -> Self::Iter { - SkipToIter::new(self.containers, n) - } -} -pub struct SkipToIter<'a> { - container_index: usize, - containers: &'a [Container], - iter: container::Iter<'a> +trait AdvanceIterContainer<'a> { + fn advance_container(&mut self) -> Option; } -impl<'a> SkipToIter<'a> { - fn new(containers: &'a [Container], n: u32) -> Self { - let (key, v) = split(n); - match containers.binary_search_by_key(&key, |e| e.key) { - Ok(index) | Err(index) => { - if index == containers.len() { - // there are no containers with a key <= n. Return empty - Self { - container_index: 0, - containers: &[], - iter: container::Iter::empty(), - } +macro_rules! impl_advance_iter_container { + ($lt:lifetime,$ty:ty) => { + impl<$lt> AdvanceIterContainer<$lt> for AdvanceToIter<$lt, $ty> { + fn advance_container(&mut self) -> Option { + if let Some(container) = self.containers_iter.next() { + let result = container.key; + self.iter = container.into_iter().peekable(); + Some(result) } else { - Self { - container_index: 0, - containers: &containers[index..], - iter: containers[index].skip_to_iter(v), - } + None } } } - } + }; } - -impl Iterator for SkipToIter<'_> { - type Item = u32; - - fn next(&mut self) -> Option { - if self.containers.is_empty() { - return None - } - let next = self.iter.next(); - if next.is_some() { - next - } else if self.container_index == self.containers.len() - 1 { - None - } else { - self.container_index += 1; - self.iter = (&self.containers[self.container_index]).into_iter(); - // we can call self.iter.next() directly here if we know that a - // container can't be empty. Do we? - self.next() - } - } -} \ No newline at end of file +impl_advance_iter_container!('a, slice::Iter<'a, Container>); +impl_advance_iter_container!('a ,vec::IntoIter); + +macro_rules! impl_advance_to { + ($ty:ty, $ret:ty $(,$lt:lifetime)? ) => { + impl$(<$lt>)? $ty { + /// Advance the iterator to the first position where the item has a value >= `n` + pub fn advance_to(mut self, n: u32) -> $ret { + if let Some(iter_front) = self.iter_front { + AdvanceToIter::new(self.containers_iter, iter_front, n) + } else { + if let Some(container) = self.containers_iter.next() { + AdvanceToIter::new(self.containers_iter, container.into_iter(), n) + } else { + AdvanceToIter::new(self.containers_iter, container::Iter::empty(), n) + } + } + } + } + }; +} +impl_advance_to!(Iter<'a>, AdvanceToIter<'a, slice::Iter<'a, Container>>, 'a); +impl_advance_to!(IntoIter, AdvanceToIter<'static, vec::IntoIter>); diff --git a/src/bitmap/store/array_store/mod.rs b/src/bitmap/store/array_store/mod.rs index a1717f90..541b38f6 100644 --- a/src/bitmap/store/array_store/mod.rs +++ b/src/bitmap/store/array_store/mod.rs @@ -11,9 +11,9 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitXor, RangeInclusive, Sub, SubAss #[cfg(not(feature = "std"))] use alloc::vec::Vec; +use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; -use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; #[derive(Clone, Eq, PartialEq)] pub struct ArrayStore { @@ -226,18 +226,6 @@ impl ArrayStore { self.vec.iter() } - pub fn skip_to_iter(&self, n: u16) -> core::slice::Iter { - match self.vec.as_slice().binary_search(&n) { - Ok(index) | Err(index) => { - if index == self.vec.len() { - [].iter() - } else { - self.vec[index..].iter() - } - } - } - } - pub fn into_iter(self) -> alloc::vec::IntoIter { self.vec.into_iter() } @@ -445,36 +433,6 @@ mod tests { } } - #[test] - fn test_skip_to_iter() { - let mut store = ArrayStore::new(); - store.insert_range(0..=16); - store.insert_range(32..=64); - - // test direct hits - let mut iter = store.skip_to_iter(32); - for n in 32..=64 { - assert_eq!(iter.next(), Some(&n)); - } - - // test index in gap - let mut i = store.skip_to_iter(17); - assert_eq!(i.next(), Some(&32)); - - // test index after end - let mut i = store.skip_to_iter(65); - assert_eq!(i.next(), None); - - // test last value - let mut i = store.skip_to_iter(64); - assert_eq!(i.next(), Some(&64)); - - // test first value - let mut i = store.skip_to_iter(0); - assert_eq!(i.next(), Some(&0)); - - } - #[test] #[allow(clippy::reversed_empty_ranges)] fn test_array_insert_invalid_range() { diff --git a/src/bitmap/store/bitmap_store.rs b/src/bitmap/store/bitmap_store.rs index fad87287..f349a2aa 100644 --- a/src/bitmap/store/bitmap_store.rs +++ b/src/bitmap/store/bitmap_store.rs @@ -304,10 +304,6 @@ impl BitmapStore { BitmapIter::new(&self.bits) } - pub(crate) fn skip_to_iter(&self, n: u16) -> BitmapIter<&[u64; BITMAP_LENGTH]> { - BitmapIter::new_from(&self.bits, n) - } - pub fn into_iter(self) -> BitmapIter> { BitmapIter::new(self.bits) } @@ -425,34 +421,6 @@ impl> BitmapIter { bits, } } - - fn new_from(bits: B, n: u16) -> BitmapIter { - let key = key(n); - let mut value = bits.borrow()[key]; - - // There is probably a way to calculate value much more efficiently - // using bit(n) and some bit magic, but I haven't figured that one out - let mut prev_value; - loop { - if value == 0 { - prev_value = 0; - break; - } - let index = value.trailing_zeros() as usize; - prev_value = value; - value &= value - 1; - if (64 * key + index) as u16 >= n { - break; - } - } - BitmapIter { - key, - value: prev_value, - key_back: BITMAP_LENGTH - 1, - value_back: bits.borrow()[BITMAP_LENGTH - 1], - bits, - } - } } impl> Iterator for BitmapIter { @@ -588,35 +556,6 @@ impl BitXorAssign<&ArrayStore> for BitmapStore { mod tests { use super::*; - #[test] - fn test_skip_to_iter() { - let mut store = BitmapStore::new(); - store.insert_range(0..=16); - store.insert_range(32..=64); - - // test direct hits - let mut iter = store.skip_to_iter(32); - for n in 32..=64 { - assert_eq!(iter.next(), Some(n)); - } - - // test index in gap - let mut i = store.skip_to_iter(17); - assert_eq!(i.next(), Some(32)); - - // test index after end - let mut i = store.skip_to_iter(65); - assert_eq!(i.next(), None); - - // test last value - let mut i = store.skip_to_iter(64); - assert_eq!(i.next(), Some(64)); - - // test first value - let mut i = store.skip_to_iter(0); - assert_eq!(i.next(), Some(0)); - - } #[test] fn test_bitmap_remove_smallest() { let mut store = BitmapStore::new(); diff --git a/src/bitmap/store/mod.rs b/src/bitmap/store/mod.rs index 26f738b9..f1372f19 100644 --- a/src/bitmap/store/mod.rs +++ b/src/bitmap/store/mod.rs @@ -30,7 +30,7 @@ pub enum Iter<'a> { Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), - Empty + Empty, } impl Store { @@ -219,17 +219,6 @@ impl Store { Bitmap(_) => self.clone(), } } - - pub(crate) fn skip_to_iter(&self, n: u16) -> Iter<'_> { - match &self { - Array(a) => { - Iter::Array(a.skip_to_iter(n)) - } - Bitmap(bm) => { - Iter::BitmapBorrowed(bm.skip_to_iter(n)) - } - } - } } impl Default for Store { diff --git a/src/lib.rs b/src/lib.rs index af7bfd4e..b4cbf5a5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,11 +94,3 @@ pub trait MultiOps: IntoIterator { /// The `symmetric difference` between all elements. fn symmetric_difference(self) -> Self::Output; } - -/// Get an iterator that yields values >= `n` -pub trait SkipTo { - /// The type of iterator that will be returned - type Iter: Iterator; - /// Get an iterator that yields values >= `n` - fn skip_to(self, n: u32) -> Self::Iter; -} diff --git a/tests/iter_advance_to.rs b/tests/iter_advance_to.rs new file mode 100644 index 00000000..7dc0b679 --- /dev/null +++ b/tests/iter_advance_to.rs @@ -0,0 +1,69 @@ +use roaring::RoaringBitmap; + +#[test] +fn iter_basic() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter().advance_to(10); + for n in 11..=14 { + assert_eq!(i.next(), Some(n)) + } + assert_eq!(i.next(), None); +} + +#[test] +fn iter_advance_past_end() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter().advance_to(15); + assert_eq!(i.next(), None); +} + +#[test] +fn iter_multi_container() { + let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.iter().advance_to(3); + assert_eq!(i.next(), Some(3)); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.next(), None); +} + +#[test] +fn iter_empty() { + let bm = RoaringBitmap::new(); + assert_eq!(bm.iter().advance_to(31337).next(), None) +} + +#[test] +fn into_iter_basic() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.into_iter().advance_to(10); + for n in 11..=14 { + assert_eq!(i.next(), Some(n)) + } + assert_eq!(i.next(), None); +} + +#[test] +fn into_iter_multi_container() { + let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.into_iter().advance_to(3); + assert_eq!(i.next(), Some(3)); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.next(), None); +} + +#[test] +fn into_iter_empty() { + let bm = RoaringBitmap::new(); + assert_eq!(bm.into_iter().advance_to(31337).next(), None) +} + +#[test] +fn iter_from() { + let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.iter_from(99999); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.next(), None); +} diff --git a/tests/skip_to.rs b/tests/skip_to.rs deleted file mode 100644 index 6b19f8ba..00000000 --- a/tests/skip_to.rs +++ /dev/null @@ -1,18 +0,0 @@ -use roaring::{RoaringBitmap, SkipTo}; - -#[test] -fn basic() { - let bm = RoaringBitmap::from([1,2,3,4,11,12,13,14]); - let mut i = bm.iter().skip_to(10); - for n in 11..=14 { - assert_eq!(i.next(), Some(n)) - } - assert_eq!(i.next(), None); - -} - -#[test] -fn empty() { - let bm = RoaringBitmap::new(); - assert_eq!(bm.iter().skip_to(31337).next(), None) -} \ No newline at end of file From d25a8fe8c8f6566c94666016538e831620acbdc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Thu, 22 Aug 2024 09:23:31 +0200 Subject: [PATCH 06/15] Fix advance_to when coming from an iterator with an iter_back --- roaring/src/bitmap/iter.rs | 52 +++++++++++++++++++++++--------- roaring/tests/iter_advance_to.rs | 10 ++++++ 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index afc86634..fa599e64 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -133,7 +133,8 @@ impl_iter!(IntoIter, Vec, vec::IntoIter, 'static); pub struct AdvanceToIter<'a, CI> { containers_iter: CI, - iter: Peekable>, + iter_front: Peekable>, + iter_back: Option>>, } #[allow(private_bounds)] @@ -141,9 +142,23 @@ impl<'a, CI> AdvanceToIter<'a, CI> where Self: AdvanceIterContainer<'a>, { - fn new(containers_iter: CI, iter: container::Iter<'a>, n: u32) -> Self { - let mut result = Self { containers_iter, iter: iter.peekable() }; - if let Some(peek) = result.iter.peek().cloned() { + fn new( + containers_iter: CI, + iter_front: Option>, + iter_back: Option>, + n: u32, + ) -> Self { + let mut result = Self { + containers_iter, + iter_front: container::Iter::empty().peekable(), + iter_back: iter_back.map(|o| o.peekable()), + }; + if let Some(iter_front) = iter_front { + result.iter_front = iter_front.peekable(); + } else { + result.advance_container(); + } + if let Some(peek) = result.iter_front.peek().cloned() { if peek < n { let (peek_key, _) = util::split(peek); let (target_key, _) = util::split(n); @@ -154,11 +169,11 @@ where } } } - while let Some(peek) = result.iter.peek() { + while let Some(peek) = result.iter_front.peek() { if *peek >= n { break; } else { - result.iter.next(); + result.iter_front.next(); } } } @@ -175,7 +190,7 @@ where fn next(&mut self) -> Option { loop { - if let item @ Some(_) = self.iter.next() { + if let item @ Some(_) = self.iter_front.next() { return item; } self.advance_container()?; @@ -224,14 +239,14 @@ impl RoaringBitmap { Ok(index) | Err(index) => { if index == self.containers.len() { // no container has a key >= key(n) - AdvanceToIter::new([].iter(), container::Iter::empty(), n) + AdvanceToIter::new([].iter(), None, None, n) } else { let containers = &self.containers[index..]; let iter = (&containers[0]).into_iter(); if index + 1 < containers.len() - 1 { - AdvanceToIter::new(containers[index + 1..].iter(), iter, n) + AdvanceToIter::new(containers[index + 1..].iter(), Some(iter), None, n) } else { - AdvanceToIter::new([].iter(), iter, n) + AdvanceToIter::new([].iter(), Some(iter), None, n) } } } @@ -397,8 +412,17 @@ macro_rules! impl_advance_iter_container { fn advance_container(&mut self) -> Option { if let Some(container) = self.containers_iter.next() { let result = container.key; - self.iter = container.into_iter().peekable(); + self.iter_front = container.into_iter().peekable(); Some(result) + } else if let Some(iter_back) = &mut self.iter_back { + std::mem::swap(iter_back, &mut self.iter_front); + self.iter_back = None; + if let Some(v) = self.iter_front.peek().cloned() { + let (key, _) = util::split(v); + Some(key) + } else { + None + } } else { None } @@ -415,12 +439,12 @@ macro_rules! impl_advance_to { /// Advance the iterator to the first position where the item has a value >= `n` pub fn advance_to(mut self, n: u32) -> $ret { if let Some(iter_front) = self.iter_front { - AdvanceToIter::new(self.containers_iter, iter_front, n) + AdvanceToIter::new(self.containers_iter, Some(iter_front), self.iter_back, n) } else { if let Some(container) = self.containers_iter.next() { - AdvanceToIter::new(self.containers_iter, container.into_iter(), n) + AdvanceToIter::new(self.containers_iter, Some(container.into_iter()), self.iter_back, n) } else { - AdvanceToIter::new(self.containers_iter, container::Iter::empty(), n) + AdvanceToIter::new(self.containers_iter, Some(container::Iter::empty()), self.iter_back, n) } } } diff --git a/roaring/tests/iter_advance_to.rs b/roaring/tests/iter_advance_to.rs index 7dc0b679..7aecc3d7 100644 --- a/roaring/tests/iter_advance_to.rs +++ b/roaring/tests/iter_advance_to.rs @@ -67,3 +67,13 @@ fn iter_from() { assert_eq!(i.next(), Some(100001)); assert_eq!(i.next(), None); } + +#[test] +fn advance_to_with_tail_iter() { + let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.iter(); + i.next_back(); + let mut i = i.advance_to(100000); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.next(), None); +} From d3f9cdbadcd54df35d3a06f1f829c711404abf99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Thu, 22 Aug 2024 11:00:33 +0200 Subject: [PATCH 07/15] Make it compile on 1.66 --- roaring/src/bitmap/iter.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index fa599e64..a63c0241 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -137,7 +137,6 @@ pub struct AdvanceToIter<'a, CI> { iter_back: Option>>, } -#[allow(private_bounds)] impl<'a, CI> AdvanceToIter<'a, CI> where Self: AdvanceIterContainer<'a>, @@ -402,7 +401,7 @@ impl RoaringBitmap { } } -trait AdvanceIterContainer<'a> { +pub trait AdvanceIterContainer<'a> { fn advance_container(&mut self) -> Option; } From 26997036815a796bbb4709196e6c17449c0da877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Wed, 25 Sep 2024 16:56:11 +0200 Subject: [PATCH 08/15] Remove Empty variant in bitmap::store::Iter --- roaring/src/bitmap/container.rs | 2 +- roaring/src/bitmap/store/mod.rs | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 0af1a558..391940fb 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -320,7 +320,7 @@ impl fmt::Debug for Container { impl<'a> Iter<'a> { pub fn empty() -> Self { - Self { key: 0, inner: store::Iter::Empty } + Self { key: 0, inner: store::Iter::Vec(vec![].into_iter()) } } } diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index f1372f19..bb0d5822 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -30,7 +30,6 @@ pub enum Iter<'a> { Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), - Empty, } impl Store { @@ -507,7 +506,6 @@ impl<'a> Iterator for Iter<'a> { Iter::Vec(inner) => inner.next(), Iter::BitmapBorrowed(inner) => inner.next(), Iter::BitmapOwned(inner) => inner.next(), - Iter::Empty => None, } } } @@ -519,7 +517,6 @@ impl DoubleEndedIterator for Iter<'_> { Iter::Vec(inner) => inner.next_back(), Iter::BitmapBorrowed(inner) => inner.next_back(), Iter::BitmapOwned(inner) => inner.next_back(), - Iter::Empty => None, } } } From 65b5330cb83d58278ffb4d3abe861ae31689bb19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Wed, 25 Sep 2024 16:56:50 +0200 Subject: [PATCH 09/15] Revert change in imports --- roaring/src/bitmap/store/array_store/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/roaring/src/bitmap/store/array_store/mod.rs b/roaring/src/bitmap/store/array_store/mod.rs index 541b38f6..883db31f 100644 --- a/roaring/src/bitmap/store/array_store/mod.rs +++ b/roaring/src/bitmap/store/array_store/mod.rs @@ -11,10 +11,11 @@ use core::ops::{BitAnd, BitAndAssign, BitOr, BitXor, RangeInclusive, Sub, SubAss #[cfg(not(feature = "std"))] use alloc::vec::Vec; -use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; +use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; + #[derive(Clone, Eq, PartialEq)] pub struct ArrayStore { vec: Vec, From 9a325bce72161e4c1bf090966a627f8f0438f63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Wed, 25 Sep 2024 17:02:39 +0200 Subject: [PATCH 10/15] Refactor iterators --- roaring/src/bitmap/iter.rs | 704 +++++++++++++++++++------------ roaring/tests/iter_advance_to.rs | 34 +- 2 files changed, 452 insertions(+), 286 deletions(-) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index a63c0241..856ef3a1 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -1,204 +1,105 @@ -use alloc::vec; -use core::slice; - use super::container::Container; use crate::{NonSortedIntegers, RoaringBitmap}; -use crate::bitmap::{container, util}; +use crate::bitmap::container; #[cfg(not(feature = "std"))] use alloc::vec::Vec; -use std::iter::Peekable; - -macro_rules! impl_iter { - ($t:ident,$c:ty, $c_iter:ty, $iter_lt:lifetime $(,$lt:lifetime)?) => { - /// An iterator for `RoaringBitmap`. - pub struct $t$(<$lt>)? { - containers_iter: $c_iter, - iter_front: Option>, - iter_back: Option>, - size_hint: u64, - } - - impl$(<$lt>)? $t$(<$lt>)? { - fn new(containers: $c) -> Self { - let size_hint = containers.iter().map(|c| c.len()).sum(); - let containers_iter = containers.into_iter(); - Self { - containers_iter, - iter_front: None, - iter_back: None, - size_hint, - } - } - } - - impl$(<$lt>)? Iterator for $t$(<$lt>)? { - type Item = u32; - - fn next(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - loop { - if let Some(iter) = &mut self.iter_front { - if let item @ Some(_) = iter.next() { - return item - } - } - if let Some(container) = self.containers_iter.next() { - self.iter_front = Some(container.into_iter()) - } else if let Some(iter) = &mut self.iter_back { - return iter.next() - } else { - return None - } - } - } - - fn size_hint(&self) -> (usize, Option) { - if self.size_hint < usize::MAX as u64 { - (self.size_hint as usize, Some(self.size_hint as usize)) - } else { - (usize::MAX, None) - } - } - - #[inline] - fn fold(self, init: B, f: F) -> B - where - Self: Sized, - F: FnMut(B, Self::Item) -> B, - { - match (self.iter_front, self.iter_back) { - (Some(iter_front), Some(iter_back)) => { - iter_front.chain(self.containers_iter.flatten()).chain(iter_back).fold(init, f) - }, - (Some(iter_front), None) => { - iter_front.chain(self.containers_iter.flatten()).fold(init, f) - }, - (None, Some(iter_back)) => { - self.containers_iter.flatten().chain(iter_back).fold(init, f) - }, - (None, None) => self.containers_iter.flatten().fold(init, f) - } - } - } - impl$(<$lt>)? DoubleEndedIterator for $t$(<$lt>)? { - fn next_back(&mut self) -> Option { - self.size_hint = self.size_hint.saturating_sub(1); - loop { - if let Some(iter) = &mut self.iter_back { - if let item @ Some(_) = iter.next_back() { - return item - } - } - if let Some(container) = self.containers_iter.next_back() { - self.iter_back = Some(container.into_iter()) - } else if let Some(iter) = &mut self.iter_front { - return iter.next_back() - } else { - return None - } - } - } +use alloc::collections::vec_deque::VecDeque; +use core::iter::Peekable; - #[inline] - fn rfold(self, init: Acc, f: Fold) -> Acc - where - Fold: FnMut(Acc, Self::Item) -> Acc, - { - match (self.iter_front, self.iter_back) { - (Some(iter_front), Some(iter_back)) => { - iter_front.chain(self.containers_iter.flatten()).chain(iter_back).rfold(init, f) - }, - (Some(iter_front), None) => { - iter_front.chain(self.containers_iter.flatten()).rfold(init, f) - }, - (None, Some(iter_back)) => { - self.containers_iter.flatten().chain(iter_back).rfold(init, f) - }, - (None, None) => self.containers_iter.flatten().rfold(init, f) - } - } - } - #[cfg(target_pointer_width = "64")] - impl$(<$lt>)? ExactSizeIterator for $t$(<$lt>)? { - fn len(&self) -> usize { - self.size_hint as usize - } - } - }; -} -impl_iter!(Iter, &'a [Container], slice::Iter<'a, Container>, 'a, 'a); -impl_iter!(IntoIter, Vec, vec::IntoIter, 'static); - -pub struct AdvanceToIter<'a, CI> { - containers_iter: CI, - iter_front: Peekable>, +use iter_inner::IterInternal; +/// An iterator for `RoaringBitmap`. +pub struct Iter<'a> { + containers: &'a [Container], + iter_front: Option>>, iter_back: Option>>, + size_hint: u64, } +impl<'a> Iter<'a> { + fn new(containers: &'a [Container]) -> Self { + let size_hint = containers.iter().map(|c| c.len()).sum(); + Self { containers, iter_front: None, iter_back: None, size_hint } + } -impl<'a, CI> AdvanceToIter<'a, CI> -where - Self: AdvanceIterContainer<'a>, -{ - fn new( - containers_iter: CI, - iter_front: Option>, - iter_back: Option>, - n: u32, - ) -> Self { - let mut result = Self { - containers_iter, - iter_front: container::Iter::empty().peekable(), - iter_back: iter_back.map(|o| o.peekable()), - }; - if let Some(iter_front) = iter_front { - result.iter_front = iter_front.peekable(); + /// Advance the iterator to the first position where the item has a value >= `n` + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.iter(); + /// iter.advance_to(2); + /// + /// assert_eq!(iter.next(), Some(2)); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn advance_to(&mut self, n: u32) { + self.advance_to_inner(n); + } +} +impl<'a> Iterator for Iter<'a> { + type Item = u32; + fn next(&mut self) -> Option { + self.next_inner() + } + fn size_hint(&self) -> (usize, Option) { + if self.size_hint < usize::MAX as u64 { + (self.size_hint as usize, Some(self.size_hint as usize)) } else { - result.advance_container(); - } - if let Some(peek) = result.iter_front.peek().cloned() { - if peek < n { - let (peek_key, _) = util::split(peek); - let (target_key, _) = util::split(n); - if target_key > peek_key { - while let Some(next_key) = result.advance_container() { - if next_key >= target_key { - break; - } - } - } - while let Some(peek) = result.iter_front.peek() { - if *peek >= n { - break; - } else { - result.iter_front.next(); - } - } - } + (usize::MAX, None) } - result + } + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.fold_inner(init, f) } } +impl<'a> DoubleEndedIterator for Iter<'a> { + fn next_back(&mut self) -> Option { + self.next_back_inner() + } -impl<'a, CI> Iterator for AdvanceToIter<'a, CI> -where - Self: AdvanceIterContainer<'a>, -{ - type Item = u32; - - fn next(&mut self) -> Option { - loop { - if let item @ Some(_) = self.iter_front.next() { - return item; - } - self.advance_container()?; - } + #[inline] + fn rfold(self, init: Acc, f: Fold) -> Acc + where + Fold: FnMut(Acc, Self::Item) -> Acc, + { + self.rfold_inner(init, f) } } +#[cfg(target_pointer_width = "64")] +impl<'a> ExactSizeIterator for Iter<'a> { + fn len(&self) -> usize { + self.size_hint as usize + } +} +/// An iterator for `RoaringBitmap`. +pub struct IntoIter { + containers: VecDeque, + iter_front: Option>>, + iter_back: Option>>, + size_hint: u64, +} +impl IntoIter { + fn new(containers: Vec) -> Self { + let size_hint = containers.iter().map(|c| c.len()).sum(); + Self { + containers: VecDeque::from(containers), + iter_front: None, + iter_back: None, + size_hint, + } + } -impl RoaringBitmap { - /// Iterator over each value stored in the RoaringBitmap, guarantees values are ordered by value. + /// Advance the iterator to the first position where the item has a value >= `n` /// /// # Examples /// @@ -207,18 +108,58 @@ impl RoaringBitmap { /// use core::iter::FromIterator; /// /// let bitmap = (1..3).collect::(); - /// let mut iter = bitmap.iter(); + /// let mut iter = bitmap.into_iter(); + /// iter.advance_to(2); /// - /// assert_eq!(iter.next(), Some(1)); /// assert_eq!(iter.next(), Some(2)); /// assert_eq!(iter.next(), None); /// ``` - pub fn iter(&self) -> Iter { - Iter::new(&self.containers) + pub fn advance_to(&mut self, n: u32) { + self.advance_to_inner(n) + } +} +impl Iterator for IntoIter { + type Item = u32; + fn next(&mut self) -> Option { + self.next_inner() + } + fn size_hint(&self) -> (usize, Option) { + if self.size_hint < usize::MAX as u64 { + (self.size_hint as usize, Some(self.size_hint as usize)) + } else { + (usize::MAX, None) + } + } + #[inline] + fn fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B, + { + self.fold_inner(init, f) } +} +impl DoubleEndedIterator for IntoIter { + fn next_back(&mut self) -> Option { + self.next_back_inner() + } + #[inline] + fn rfold(self, init: Acc, f: Fold) -> Acc + where + Fold: FnMut(Acc, Self::Item) -> Acc, + { + self.rfold_inner(init, f) + } +} +#[cfg(target_pointer_width = "64")] +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.size_hint as usize + } +} - /// Iterator over each value >= `n` stored in the RoaringBitmap, guarantees values are ordered - /// by value. +impl RoaringBitmap { + /// Iterator over each value stored in the RoaringBitmap, guarantees values are ordered by value. /// /// # Examples /// @@ -227,56 +168,35 @@ impl RoaringBitmap { /// use core::iter::FromIterator; /// /// let bitmap = (1..3).collect::(); - /// let mut iter = bitmap.iter_from(2); + /// let mut iter = bitmap.iter(); /// + /// assert_eq!(iter.next(), Some(1)); /// assert_eq!(iter.next(), Some(2)); /// assert_eq!(iter.next(), None); /// ``` - pub fn iter_from(&self, n: u32) -> AdvanceToIter<'_, slice::Iter<'_, Container>> { - let (key, _) = util::split(n); - match self.containers.binary_search_by_key(&key, |container| container.key) { - Ok(index) | Err(index) => { - if index == self.containers.len() { - // no container has a key >= key(n) - AdvanceToIter::new([].iter(), None, None, n) - } else { - let containers = &self.containers[index..]; - let iter = (&containers[0]).into_iter(); - if index + 1 < containers.len() - 1 { - AdvanceToIter::new(containers[index + 1..].iter(), Some(iter), None, n) - } else { - AdvanceToIter::new([].iter(), Some(iter), None, n) - } - } - } - } + pub fn iter(&self) -> Iter { + Iter::new(&self.containers) } } - impl<'a> IntoIterator for &'a RoaringBitmap { type Item = u32; type IntoIter = Iter<'a>; - fn into_iter(self) -> Iter<'a> { self.iter() } } - impl IntoIterator for RoaringBitmap { type Item = u32; type IntoIter = IntoIter; - fn into_iter(self) -> IntoIter { IntoIter::new(self.containers) } } - impl From<[u32; N]> for RoaringBitmap { fn from(arr: [u32; N]) -> Self { RoaringBitmap::from_iter(arr) } } - impl FromIterator for RoaringBitmap { fn from_iter>(iterator: I) -> RoaringBitmap { let mut rb = RoaringBitmap::new(); @@ -284,7 +204,6 @@ impl FromIterator for RoaringBitmap { rb } } - impl<'a> FromIterator<&'a u32> for RoaringBitmap { fn from_iter>(iterator: I) -> RoaringBitmap { let mut rb = RoaringBitmap::new(); @@ -292,7 +211,6 @@ impl<'a> FromIterator<&'a u32> for RoaringBitmap { rb } } - impl Extend for RoaringBitmap { fn extend>(&mut self, iterator: I) { for value in iterator { @@ -300,7 +218,6 @@ impl Extend for RoaringBitmap { } } } - impl<'a> Extend<&'a u32> for RoaringBitmap { fn extend>(&mut self, iterator: I) { for value in iterator { @@ -308,7 +225,6 @@ impl<'a> Extend<&'a u32> for RoaringBitmap { } } } - impl RoaringBitmap { /// Create the set from a sorted iterator. Values must be sorted and deduplicated. /// @@ -345,7 +261,6 @@ impl RoaringBitmap { let mut rb = RoaringBitmap::new(); rb.append(iterator).map(|_| rb) } - /// Extend the set with a sorted iterator. /// /// The values of the iterator must be ordered and strictly greater than the greatest value @@ -369,24 +284,16 @@ impl RoaringBitmap { &mut self, iterator: I, ) -> Result { - // Name shadowed to prevent accidentally referencing the param let mut iterator = iterator.into_iter(); - let mut prev = match (iterator.next(), self.max()) { (None, _) => return Ok(0), (Some(first), Some(max)) if first <= max => { - return Err(NonSortedIntegers { valid_until: 0 }) + return Err(NonSortedIntegers { valid_until: 0 }); } (Some(first), _) => first, }; - - // It is now guaranteed that so long as the values of the iterator are - // monotonically increasing they must also be the greatest in the set. - self.push_unchecked(prev); - let mut count = 1; - for value in iterator { if value <= prev { return Err(NonSortedIntegers { valid_until: count }); @@ -396,59 +303,318 @@ impl RoaringBitmap { count += 1; } } - Ok(count) } } -pub trait AdvanceIterContainer<'a> { - fn advance_container(&mut self) -> Option; -} +mod iter_inner { + use crate::bitmap::container::Container; + use crate::bitmap::{container, util, IntoIter, Iter}; + use core::iter::Peekable; + use core::slice; -macro_rules! impl_advance_iter_container { - ($lt:lifetime,$ty:ty) => { - impl<$lt> AdvanceIterContainer<$lt> for AdvanceToIter<$lt, $ty> { - fn advance_container(&mut self) -> Option { - if let Some(container) = self.containers_iter.next() { - let result = container.key; - self.iter_front = container.into_iter().peekable(); - Some(result) - } else if let Some(iter_back) = &mut self.iter_back { - std::mem::swap(iter_back, &mut self.iter_front); - self.iter_back = None; - if let Some(v) = self.iter_front.peek().cloned() { - let (key, _) = util::split(v); - Some(key) - } else { - None + // To get rid of clippy complex type warning + pub(super) type DecomposeInnerIter = Option>; + pub(super) trait IterInternal { + type InnerIter: Iterator + DoubleEndedIterator; + type Container: IntoIterator + AsRef; + + type ContainerIterator: Iterator + DoubleEndedIterator; + type IntoContainerIterator: IntoIterator; + + fn pop_container_front(&mut self) -> Option; + fn pop_container_back(&mut self) -> Option; + + fn drain_containers_until(&mut self, index: usize); + + fn clear_containers(&mut self); + + fn find_container(&self, key: u16) -> Option; + + fn iter_front_mut(&mut self) -> &mut Option>; + fn iter_back_mut(&mut self) -> &mut Option>; + + fn dec_size_hint(&mut self); + + fn decompose( + self, + ) -> ( + DecomposeInnerIter, + DecomposeInnerIter, + Self::IntoContainerIterator, + ); + + #[inline] + fn next_inner(&mut self) -> Option { + self.dec_size_hint(); + loop { + if let Some(iter) = self.iter_front_mut() { + if let item @ Some(_) = iter.next() { + return item; } + } + if self.advance_container().is_some() { + continue; } else { - None + return None; } } } - }; -} -impl_advance_iter_container!('a, slice::Iter<'a, Container>); -impl_advance_iter_container!('a ,vec::IntoIter); - -macro_rules! impl_advance_to { - ($ty:ty, $ret:ty $(,$lt:lifetime)? ) => { - impl$(<$lt>)? $ty { - /// Advance the iterator to the first position where the item has a value >= `n` - pub fn advance_to(mut self, n: u32) -> $ret { - if let Some(iter_front) = self.iter_front { - AdvanceToIter::new(self.containers_iter, Some(iter_front), self.iter_back, n) + + #[inline] + fn next_back_inner(&mut self) -> Option { + self.dec_size_hint(); + loop { + if let Some(iter) = self.iter_back_mut() { + if let item @ Some(_) = iter.next_back() { + return item; + } + } + if self.advance_container_back().is_some() { + continue; + } else { + return None; + } + } + } + + fn advance_to_inner(&mut self, n: u32) { + let (key, _) = util::split(n); + fn advance_iter(iter: &mut Peekable>, n: u32) -> Option { + while let Some(next) = iter.peek().cloned() { + if next < n { + iter.next(); } else { - if let Some(container) = self.containers_iter.next() { - AdvanceToIter::new(self.containers_iter, Some(container.into_iter()), self.iter_back, n) - } else { - AdvanceToIter::new(self.containers_iter, Some(container::Iter::empty()), self.iter_back, n) - } + return Some(next); + } + } + None + } + if let Some(index) = self.find_container(key) { + self.drain_containers_until(index); + let container = self.pop_container_front().expect("bug!"); + let mut iter = container.into_iter().peekable(); + advance_iter(&mut iter, n); + *self.iter_front_mut() = Some(iter); + } else { + // there are no containers with given key. Look in iter_front and iter_back. + self.clear_containers(); + if let Some(iter_front) = self.iter_front_mut() { + if iter_front.peek().cloned().map(|n| util::split(n).0) == Some(key) { + advance_iter(iter_front, n); + return; + } + } + if let Some(iter_back) = self.iter_back_mut() { + if iter_back.peek().cloned().map(|n| util::split(n).0) == Some(key) { + advance_iter(iter_back, n); + *self.iter_front_mut() = None; + return; } } + *self.iter_front_mut() = None; + *self.iter_back_mut() = None; } - }; + } + + #[inline] + fn fold_inner(self, init: B, f: F) -> B + where + F: FnMut(B, u32) -> B, + Self: Sized, + { + let (iter_front, iter_back, containers) = self.decompose(); + match (iter_front, iter_back) { + (Some(iter_front), Some(iter_back)) => iter_front + .chain(containers.into_iter().flatten()) + .chain(iter_back) + .fold(init, f), + (Some(iter_front), None) => { + iter_front.chain(containers.into_iter().flatten()).fold(init, f) + } + (None, Some(iter_back)) => { + containers.into_iter().flatten().chain(iter_back).fold(init, f) + } + (None, None) => containers.into_iter().flatten().fold(init, f), + } + } + + fn rfold_inner(self, init: Acc, f: Fold) -> Acc + where + Fold: FnMut(Acc, u32) -> Acc, + Self: Sized, + { + let (iter_front, iter_back, containers) = self.decompose(); + match (iter_front, iter_back) { + (Some(iter_front), Some(iter_back)) => iter_front + .chain(containers.into_iter().flatten()) + .chain(iter_back) + .rfold(init, f), + (Some(iter_front), None) => { + iter_front.chain(containers.into_iter().flatten()).rfold(init, f) + } + (None, Some(iter_back)) => { + containers.into_iter().flatten().chain(iter_back).rfold(init, f) + } + (None, None) => containers.into_iter().flatten().rfold(init, f), + } + } + + fn advance_container(&mut self) -> Option { + if let Some(container) = self.pop_container_front() { + let result = container.as_ref().key; + *self.iter_front_mut() = Some(container.into_iter().peekable()); + Some(result) + } else if self.iter_back_mut().is_some() { + let mut iter_none = None; + core::mem::swap(&mut iter_none, self.iter_back_mut()); + core::mem::swap(&mut iter_none, self.iter_front_mut()); + *self.iter_back_mut() = None; + if let Some(v) = self.iter_front_mut().as_mut().and_then(|i| i.peek().cloned()) { + let (key, _) = util::split(v); + Some(key) + } else { + None + } + } else { + None + } + } + + fn advance_container_back(&mut self) -> Option { + if let Some(container) = self.pop_container_back() { + let result = container.as_ref().key; + *self.iter_back_mut() = Some(container.into_iter().peekable()); + Some(result) + } else if self.iter_front_mut().is_some() { + let mut iter_none = None; + core::mem::swap(&mut iter_none, self.iter_front_mut()); + core::mem::swap(&mut iter_none, self.iter_back_mut()); + + if let Some(v) = self.iter_back_mut().as_mut().and_then(|i| i.peek().cloned()) { + let (key, _) = util::split(v); + Some(key) + } else { + None + } + } else { + None + } + } + } + + impl IterInternal for IntoIter { + type InnerIter = container::Iter<'static>; + type Container = Container; + type ContainerIterator = alloc::collections::vec_deque::IntoIter; + type IntoContainerIterator = alloc::collections::vec_deque::IntoIter; + + fn pop_container_front(&mut self) -> Option { + self.containers.pop_front() + } + + fn pop_container_back(&mut self) -> Option { + self.containers.pop_back() + } + + fn drain_containers_until(&mut self, index: usize) { + self.containers.drain(0..index); + } + + fn clear_containers(&mut self) { + self.containers.clear(); + } + + fn find_container(&self, key: u16) -> Option { + match self.containers.binary_search_by_key(&key, |container| container.key) { + Ok(index) | Err(index) if index < self.containers.len() => Some(index), + _ => None, + } + } + + fn iter_front_mut(&mut self) -> &mut Option> { + &mut self.iter_front + } + + fn iter_back_mut(&mut self) -> &mut Option> { + &mut self.iter_back + } + + fn dec_size_hint(&mut self) { + self.size_hint = self.size_hint.saturating_sub(1); + } + + fn decompose( + self, + ) -> ( + Option>, + Option>, + Self::IntoContainerIterator, + ) { + (self.iter_front, self.iter_back, self.containers.into_iter()) + } + } + + impl<'a> IterInternal for Iter<'a> { + type InnerIter = container::Iter<'a>; + type Container = &'a Container; + type ContainerIterator = slice::Iter<'a, Container>; + + type IntoContainerIterator = slice::Iter<'a, Container>; + + fn pop_container_front(&mut self) -> Option { + if let Some((first, rest)) = self.containers.split_first() { + self.containers = rest; + Some(first) + } else { + None + } + } + + fn pop_container_back(&mut self) -> Option { + if let Some((last, rest)) = self.containers.split_last() { + self.containers = rest; + Some(last) + } else { + None + } + } + + fn drain_containers_until(&mut self, index: usize) { + self.containers = &self.containers[index..] + } + + fn clear_containers(&mut self) { + self.containers = &[]; + } + + fn find_container(&self, key: u16) -> Option { + match self.containers.binary_search_by_key(&key, |container| container.key) { + Ok(index) | Err(index) if index < self.containers.len() => Some(index), + _ => None, + } + } + + fn iter_front_mut(&mut self) -> &mut Option> { + &mut self.iter_front + } + + fn iter_back_mut(&mut self) -> &mut Option> { + &mut self.iter_back + } + + fn dec_size_hint(&mut self) { + self.size_hint = self.size_hint.saturating_sub(1); + } + + fn decompose( + self, + ) -> ( + Option>, + Option>, + Self::IntoContainerIterator, + ) { + (self.iter_front, self.iter_back, self.containers.iter()) + } + } } -impl_advance_to!(Iter<'a>, AdvanceToIter<'a, slice::Iter<'a, Container>>, 'a); -impl_advance_to!(IntoIter, AdvanceToIter<'static, vec::IntoIter>); diff --git a/roaring/tests/iter_advance_to.rs b/roaring/tests/iter_advance_to.rs index 7aecc3d7..9ff3e87d 100644 --- a/roaring/tests/iter_advance_to.rs +++ b/roaring/tests/iter_advance_to.rs @@ -3,7 +3,8 @@ use roaring::RoaringBitmap; #[test] fn iter_basic() { let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); - let mut i = bm.iter().advance_to(10); + let mut i = bm.iter(); + i.advance_to(10); for n in 11..=14 { assert_eq!(i.next(), Some(n)) } @@ -13,14 +14,16 @@ fn iter_basic() { #[test] fn iter_advance_past_end() { let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); - let mut i = bm.iter().advance_to(15); + let mut i = bm.iter(); + i.advance_to(15); assert_eq!(i.next(), None); } #[test] fn iter_multi_container() { let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); - let mut i = bm.iter().advance_to(3); + let mut i = bm.iter(); + i.advance_to(3); assert_eq!(i.next(), Some(3)); assert_eq!(i.next(), Some(100000)); assert_eq!(i.next(), Some(100001)); @@ -30,13 +33,16 @@ fn iter_multi_container() { #[test] fn iter_empty() { let bm = RoaringBitmap::new(); - assert_eq!(bm.iter().advance_to(31337).next(), None) + let mut i = bm.iter(); + i.advance_to(31337); + assert_eq!(i.next(), None) } #[test] fn into_iter_basic() { let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); - let mut i = bm.into_iter().advance_to(10); + let mut i = bm.into_iter(); + i.advance_to(10); for n in 11..=14 { assert_eq!(i.next(), Some(n)) } @@ -46,7 +52,8 @@ fn into_iter_basic() { #[test] fn into_iter_multi_container() { let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); - let mut i = bm.into_iter().advance_to(3); + let mut i = bm.into_iter(); + i.advance_to(3); assert_eq!(i.next(), Some(3)); assert_eq!(i.next(), Some(100000)); assert_eq!(i.next(), Some(100001)); @@ -56,16 +63,9 @@ fn into_iter_multi_container() { #[test] fn into_iter_empty() { let bm = RoaringBitmap::new(); - assert_eq!(bm.into_iter().advance_to(31337).next(), None) -} - -#[test] -fn iter_from() { - let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); - let mut i = bm.iter_from(99999); - assert_eq!(i.next(), Some(100000)); - assert_eq!(i.next(), Some(100001)); - assert_eq!(i.next(), None); + let mut i = bm.into_iter(); + i.advance_to(31337); + assert_eq!(i.next(), None) } #[test] @@ -73,7 +73,7 @@ fn advance_to_with_tail_iter() { let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); let mut i = bm.iter(); i.next_back(); - let mut i = i.advance_to(100000); + i.advance_to(100000); assert_eq!(i.next(), Some(100000)); assert_eq!(i.next(), None); } From feca297b0dc706077eb5a4130adb6388434578d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Thu, 26 Sep 2024 09:49:23 +0200 Subject: [PATCH 11/15] Fix performance regression in iterators --- roaring/src/bitmap/container.rs | 5 + roaring/src/bitmap/iter.rs | 205 ++++++++++++----------- roaring/src/bitmap/store/bitmap_store.rs | 22 +++ roaring/src/bitmap/store/mod.rs | 10 ++ 4 files changed, 140 insertions(+), 102 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 391940fb..05145a6d 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -322,6 +322,11 @@ impl<'a> Iter<'a> { pub fn empty() -> Self { Self { key: 0, inner: store::Iter::Vec(vec![].into_iter()) } } + + #[inline] + pub fn peek(&mut self) -> Option { + self.inner.peek().map(|i| util::join(self.key, i)) + } } impl AsRef for Container { diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 856ef3a1..b1b8363f 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -6,20 +6,28 @@ use crate::bitmap::container; use alloc::vec::Vec; use alloc::collections::vec_deque::VecDeque; -use core::iter::Peekable; use iter_inner::IterInternal; /// An iterator for `RoaringBitmap`. pub struct Iter<'a> { containers: &'a [Container], - iter_front: Option>>, - iter_back: Option>>, + iter_front: container::Iter<'a>, + iter_back: Option>, size_hint: u64, } impl<'a> Iter<'a> { fn new(containers: &'a [Container]) -> Self { let size_hint = containers.iter().map(|c| c.len()).sum(); - Self { containers, iter_front: None, iter_back: None, size_hint } + if let Some((first, rest)) = containers.split_first() { + Self { containers: rest, iter_front: first.into_iter(), iter_back: None, size_hint } + } else { + Self { + containers: &[], + iter_front: container::Iter::empty(), + iter_back: None, + size_hint, + } + } } /// Advance the iterator to the first position where the item has a value >= `n` @@ -84,18 +92,23 @@ impl<'a> ExactSizeIterator for Iter<'a> { /// An iterator for `RoaringBitmap`. pub struct IntoIter { containers: VecDeque, - iter_front: Option>>, - iter_back: Option>>, + iter_front: container::Iter<'static>, + iter_back: Option>, size_hint: u64, } impl IntoIter { fn new(containers: Vec) -> Self { let size_hint = containers.iter().map(|c| c.len()).sum(); - Self { - containers: VecDeque::from(containers), - iter_front: None, - iter_back: None, - size_hint, + let mut containers = VecDeque::from(containers); + if let Some(first) = containers.pop_front() { + Self { containers, iter_front: first.into_iter(), iter_back: None, size_hint } + } else { + Self { + containers: Default::default(), + iter_front: container::Iter::empty(), + iter_back: None, + size_hint, + } } } @@ -310,14 +323,11 @@ impl RoaringBitmap { mod iter_inner { use crate::bitmap::container::Container; use crate::bitmap::{container, util, IntoIter, Iter}; - use core::iter::Peekable; use core::slice; // To get rid of clippy complex type warning - pub(super) type DecomposeInnerIter = Option>; - pub(super) trait IterInternal { - type InnerIter: Iterator + DoubleEndedIterator; - type Container: IntoIterator + AsRef; + pub(super) trait IterInternal<'a> { + type Container: IntoIterator, Item = u32> + AsRef; type ContainerIterator: Iterator + DoubleEndedIterator; type IntoContainerIterator: IntoIterator; @@ -331,27 +341,23 @@ mod iter_inner { fn find_container(&self, key: u16) -> Option; - fn iter_front_mut(&mut self) -> &mut Option>; - fn iter_back_mut(&mut self) -> &mut Option>; + fn iter_front_mut(&mut self) -> &mut container::Iter<'a>; + fn iter_back_mut(&mut self) -> &mut Option>; fn dec_size_hint(&mut self); + fn empty_inner_iter() -> container::Iter<'static>; + fn decompose( self, - ) -> ( - DecomposeInnerIter, - DecomposeInnerIter, - Self::IntoContainerIterator, - ); + ) -> (container::Iter<'a>, Option>, Self::IntoContainerIterator); #[inline] fn next_inner(&mut self) -> Option { self.dec_size_hint(); loop { - if let Some(iter) = self.iter_front_mut() { - if let item @ Some(_) = iter.next() { - return item; - } + if let item @ Some(_) = self.iter_front_mut().next() { + return item; } if self.advance_container().is_some() { continue; @@ -380,8 +386,8 @@ mod iter_inner { fn advance_to_inner(&mut self, n: u32) { let (key, _) = util::split(n); - fn advance_iter(iter: &mut Peekable>, n: u32) -> Option { - while let Some(next) = iter.peek().cloned() { + fn advance_iter(iter: &mut container::Iter<'_>, n: u32) -> Option { + while let Some(next) = iter.peek() { if next < n { iter.next(); } else { @@ -393,26 +399,25 @@ mod iter_inner { if let Some(index) = self.find_container(key) { self.drain_containers_until(index); let container = self.pop_container_front().expect("bug!"); - let mut iter = container.into_iter().peekable(); + let mut iter = container.into_iter(); advance_iter(&mut iter, n); - *self.iter_front_mut() = Some(iter); + *self.iter_front_mut() = iter; } else { // there are no containers with given key. Look in iter_front and iter_back. - self.clear_containers(); - if let Some(iter_front) = self.iter_front_mut() { - if iter_front.peek().cloned().map(|n| util::split(n).0) == Some(key) { - advance_iter(iter_front, n); - return; - } + let iter_front = self.iter_front_mut(); + if iter_front.peek().map(|n| util::split(n).0) == Some(key) { + advance_iter(iter_front, n); + return; } if let Some(iter_back) = self.iter_back_mut() { - if iter_back.peek().cloned().map(|n| util::split(n).0) == Some(key) { + if iter_back.peek().map(|n| util::split(n).0) == Some(key) { advance_iter(iter_back, n); - *self.iter_front_mut() = None; + *self.iter_front_mut() = Self::empty_inner_iter(); return; } } - *self.iter_front_mut() = None; + self.clear_containers(); + *self.iter_front_mut() = Self::empty_inner_iter(); *self.iter_back_mut() = None; } } @@ -424,53 +429,38 @@ mod iter_inner { Self: Sized, { let (iter_front, iter_back, containers) = self.decompose(); - match (iter_front, iter_back) { - (Some(iter_front), Some(iter_back)) => iter_front - .chain(containers.into_iter().flatten()) - .chain(iter_back) - .fold(init, f), - (Some(iter_front), None) => { - iter_front.chain(containers.into_iter().flatten()).fold(init, f) - } - (None, Some(iter_back)) => { - containers.into_iter().flatten().chain(iter_back).fold(init, f) - } - (None, None) => containers.into_iter().flatten().fold(init, f), + if let Some(iter_back) = iter_back { + iter_front.chain(containers.into_iter().flatten()).chain(iter_back).fold(init, f) + } else { + iter_front.chain(containers.into_iter().flatten()).fold(init, f) } } + #[inline] fn rfold_inner(self, init: Acc, f: Fold) -> Acc where Fold: FnMut(Acc, u32) -> Acc, Self: Sized, { let (iter_front, iter_back, containers) = self.decompose(); - match (iter_front, iter_back) { - (Some(iter_front), Some(iter_back)) => iter_front - .chain(containers.into_iter().flatten()) - .chain(iter_back) - .rfold(init, f), - (Some(iter_front), None) => { - iter_front.chain(containers.into_iter().flatten()).rfold(init, f) - } - (None, Some(iter_back)) => { - containers.into_iter().flatten().chain(iter_back).rfold(init, f) - } - (None, None) => containers.into_iter().flatten().rfold(init, f), + if let Some(iter_back) = iter_back { + iter_front.chain(containers.into_iter().flatten()).chain(iter_back).rfold(init, f) + } else { + iter_front.chain(containers.into_iter().flatten()).rfold(init, f) } } + #[inline] fn advance_container(&mut self) -> Option { if let Some(container) = self.pop_container_front() { let result = container.as_ref().key; - *self.iter_front_mut() = Some(container.into_iter().peekable()); + *self.iter_front_mut() = container.into_iter(); Some(result) } else if self.iter_back_mut().is_some() { - let mut iter_none = None; - core::mem::swap(&mut iter_none, self.iter_back_mut()); - core::mem::swap(&mut iter_none, self.iter_front_mut()); - *self.iter_back_mut() = None; - if let Some(v) = self.iter_front_mut().as_mut().and_then(|i| i.peek().cloned()) { + let mut iter_back = None; + core::mem::swap(&mut iter_back, self.iter_back_mut()); + *self.iter_front_mut() = iter_back.expect("bug!"); + if let Some(v) = self.iter_front_mut().peek() { let (key, _) = util::split(v); Some(key) } else { @@ -481,17 +471,18 @@ mod iter_inner { } } + #[inline] fn advance_container_back(&mut self) -> Option { if let Some(container) = self.pop_container_back() { let result = container.as_ref().key; - *self.iter_back_mut() = Some(container.into_iter().peekable()); + *self.iter_back_mut() = Some(container.into_iter()); Some(result) - } else if self.iter_front_mut().is_some() { - let mut iter_none = None; - core::mem::swap(&mut iter_none, self.iter_front_mut()); - core::mem::swap(&mut iter_none, self.iter_back_mut()); + } else if self.iter_front_mut().peek().is_some() { + let mut iter_front = Self::empty_inner_iter(); + core::mem::swap(&mut iter_front, self.iter_front_mut()); + *self.iter_back_mut() = Some(iter_front); - if let Some(v) = self.iter_back_mut().as_mut().and_then(|i| i.peek().cloned()) { + if let Some(v) = self.iter_back_mut().as_mut().and_then(|i| i.peek()) { let (key, _) = util::split(v); Some(key) } else { @@ -503,65 +494,70 @@ mod iter_inner { } } - impl IterInternal for IntoIter { - type InnerIter = container::Iter<'static>; + impl IterInternal<'static> for IntoIter { type Container = Container; type ContainerIterator = alloc::collections::vec_deque::IntoIter; type IntoContainerIterator = alloc::collections::vec_deque::IntoIter; + #[inline] fn pop_container_front(&mut self) -> Option { self.containers.pop_front() } + #[inline] fn pop_container_back(&mut self) -> Option { self.containers.pop_back() } + #[inline] fn drain_containers_until(&mut self, index: usize) { self.containers.drain(0..index); } + #[inline] fn clear_containers(&mut self) { self.containers.clear(); } + #[inline] fn find_container(&self, key: u16) -> Option { - match self.containers.binary_search_by_key(&key, |container| container.key) { - Ok(index) | Err(index) if index < self.containers.len() => Some(index), - _ => None, - } + self.containers.binary_search_by_key(&key, |container| container.key).ok() } - fn iter_front_mut(&mut self) -> &mut Option> { + #[inline] + fn iter_front_mut(&mut self) -> &mut container::Iter<'static> { &mut self.iter_front } - fn iter_back_mut(&mut self) -> &mut Option> { + #[inline] + fn iter_back_mut(&mut self) -> &mut Option> { &mut self.iter_back } + #[inline] fn dec_size_hint(&mut self) { self.size_hint = self.size_hint.saturating_sub(1); } + fn empty_inner_iter() -> container::Iter<'static> { + container::Iter::empty() + } + fn decompose( self, - ) -> ( - Option>, - Option>, - Self::IntoContainerIterator, - ) { + ) -> (container::Iter<'static>, Option>, Self::IntoContainerIterator) + { (self.iter_front, self.iter_back, self.containers.into_iter()) } } - impl<'a> IterInternal for Iter<'a> { - type InnerIter = container::Iter<'a>; + impl<'a> IterInternal<'a> for Iter<'a> { type Container = &'a Container; type ContainerIterator = slice::Iter<'a, Container>; type IntoContainerIterator = slice::Iter<'a, Container>; + #[inline] fn pop_container_front(&mut self) -> Option { if let Some((first, rest)) = self.containers.split_first() { self.containers = rest; @@ -571,6 +567,7 @@ mod iter_inner { } } + #[inline] fn pop_container_back(&mut self) -> Option { if let Some((last, rest)) = self.containers.split_last() { self.containers = rest; @@ -580,40 +577,44 @@ mod iter_inner { } } + #[inline] fn drain_containers_until(&mut self, index: usize) { self.containers = &self.containers[index..] } + #[inline] fn clear_containers(&mut self) { self.containers = &[]; } + #[inline] fn find_container(&self, key: u16) -> Option { - match self.containers.binary_search_by_key(&key, |container| container.key) { - Ok(index) | Err(index) if index < self.containers.len() => Some(index), - _ => None, - } + self.containers.binary_search_by_key(&key, |container| container.key).ok() } - fn iter_front_mut(&mut self) -> &mut Option> { + #[inline] + fn iter_front_mut(&mut self) -> &mut container::Iter<'a> { &mut self.iter_front } - fn iter_back_mut(&mut self) -> &mut Option> { + #[inline] + fn iter_back_mut(&mut self) -> &mut Option> { &mut self.iter_back } + #[inline] fn dec_size_hint(&mut self) { self.size_hint = self.size_hint.saturating_sub(1); } + fn empty_inner_iter() -> container::Iter<'static> { + container::Iter::empty() + } + fn decompose( self, - ) -> ( - Option>, - Option>, - Self::IntoContainerIterator, - ) { + ) -> (container::Iter<'a>, Option>, Self::IntoContainerIterator) + { (self.iter_front, self.iter_back, self.containers.iter()) } } diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index f349a2aa..b74af8a7 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -421,6 +421,28 @@ impl> BitmapIter { bits, } } + + pub fn peek(&self) -> Option { + let mut key = self.key; + let mut value = self.value; + loop { + if value == 0 { + key += 1; + let cmp = self.key.cmp(&self.key_back); + // Match arms can be reordered, this ordering is perf sensitive + value = if cmp == Ordering::Less { + unsafe { *self.bits.borrow().get_unchecked(self.key) } + } else if cmp == Ordering::Equal { + self.value_back + } else { + return None; + }; + continue; + } + let index = value.trailing_zeros() as usize; + return Some((64 * key + index) as u16); + } + } } impl> Iterator for BitmapIter { diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index bb0d5822..c427d9bd 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -497,6 +497,16 @@ impl PartialEq for Store { } } +impl Iter<'_> { + pub fn peek(&mut self) -> Option { + match self { + Iter::Array(inner) => inner.as_slice().iter().next().cloned(), + Iter::Vec(inner) => inner.as_slice().iter().next().cloned(), + Iter::BitmapBorrowed(inner) => inner.peek(), + Iter::BitmapOwned(inner) => inner.peek(), + } + } +} impl<'a> Iterator for Iter<'a> { type Item = u16; From de80748e0bdf53ad8070e19b3b15bf3e9867cc7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Wed, 2 Oct 2024 08:57:23 +0200 Subject: [PATCH 12/15] Remove dangling comment --- roaring/src/bitmap/iter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index b1b8363f..795a07d6 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -325,7 +325,6 @@ mod iter_inner { use crate::bitmap::{container, util, IntoIter, Iter}; use core::slice; - // To get rid of clippy complex type warning pub(super) trait IterInternal<'a> { type Container: IntoIterator, Item = u32> + AsRef; From 32a3695eb00865ee476ff153538708790a66ceff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Wed, 2 Oct 2024 08:59:16 +0200 Subject: [PATCH 13/15] Simplify store::Iter::peek --- roaring/src/bitmap/store/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index c427d9bd..4d22a2ff 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -500,8 +500,8 @@ impl PartialEq for Store { impl Iter<'_> { pub fn peek(&mut self) -> Option { match self { - Iter::Array(inner) => inner.as_slice().iter().next().cloned(), - Iter::Vec(inner) => inner.as_slice().iter().next().cloned(), + Iter::Array(inner) => inner.as_slice().first().cloned(), + Iter::Vec(inner) => inner.as_slice().first().cloned(), Iter::BitmapBorrowed(inner) => inner.peek(), Iter::BitmapOwned(inner) => inner.peek(), } From 522e945a1addb4dc18fcc19377e09ff76041b4eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Wed, 2 Oct 2024 14:52:06 +0200 Subject: [PATCH 14/15] Fix size_hint() when using advance_to --- roaring/src/bitmap/container.rs | 2 +- roaring/src/bitmap/iter.rs | 76 +++++++++++++++++++++++--------- roaring/src/bitmap/store/mod.rs | 63 +++++++++++++++++--------- roaring/tests/iter_advance_to.rs | 17 ++++++- 4 files changed, 114 insertions(+), 44 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 05145a6d..fb95b764 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -320,7 +320,7 @@ impl fmt::Debug for Container { impl<'a> Iter<'a> { pub fn empty() -> Self { - Self { key: 0, inner: store::Iter::Vec(vec![].into_iter()) } + Self { key: 0, inner: store::Iter::empty() } } #[inline] diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 795a07d6..957fd791 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -343,7 +343,8 @@ mod iter_inner { fn iter_front_mut(&mut self) -> &mut container::Iter<'a>; fn iter_back_mut(&mut self) -> &mut Option>; - fn dec_size_hint(&mut self); + fn dec_size_hint(&mut self, n: u64); + fn set_size_hint(&mut self, n: u64); fn empty_inner_iter() -> container::Iter<'static>; @@ -353,7 +354,7 @@ mod iter_inner { #[inline] fn next_inner(&mut self) -> Option { - self.dec_size_hint(); + self.dec_size_hint(1); loop { if let item @ Some(_) = self.iter_front_mut().next() { return item; @@ -368,7 +369,7 @@ mod iter_inner { #[inline] fn next_back_inner(&mut self) -> Option { - self.dec_size_hint(); + self.dec_size_hint(1); loop { if let Some(iter) = self.iter_back_mut() { if let item @ Some(_) = iter.next_back() { @@ -384,40 +385,50 @@ mod iter_inner { } fn advance_to_inner(&mut self, n: u32) { - let (key, _) = util::split(n); - fn advance_iter(iter: &mut container::Iter<'_>, n: u32) -> Option { + fn advance_iter(iter: &mut container::Iter<'_>, n: u32) -> u64 { + let mut items_skipped = 0; while let Some(next) = iter.peek() { if next < n { iter.next(); + items_skipped += 1; } else { - return Some(next); + return items_skipped; } } - None + items_skipped } + let (key, _) = util::split(n); if let Some(index) = self.find_container(key) { self.drain_containers_until(index); let container = self.pop_container_front().expect("bug!"); let mut iter = container.into_iter(); - advance_iter(&mut iter, n); + self.dec_size_hint(advance_iter(&mut iter, n)); *self.iter_front_mut() = iter; } else { // there are no containers with given key. Look in iter_front and iter_back. - let iter_front = self.iter_front_mut(); - if iter_front.peek().map(|n| util::split(n).0) == Some(key) { - advance_iter(iter_front, n); + if self.iter_front_mut().peek().map(|n| util::split(n).0) == Some(key) { + let skipped = advance_iter(self.iter_front_mut(), n); + self.dec_size_hint(skipped); return; } - if let Some(iter_back) = self.iter_back_mut() { - if iter_back.peek().map(|n| util::split(n).0) == Some(key) { - advance_iter(iter_back, n); - *self.iter_front_mut() = Self::empty_inner_iter(); - return; - } + if self + .iter_back_mut() + .as_mut() + .and_then(|i| i.peek().filter(|n| util::split(*n).0 == key)) + .is_some() + { + let mut iter_back = None; + std::mem::swap(&mut iter_back, self.iter_back_mut()); + let mut iter_back = iter_back.expect("bug!"); + advance_iter(&mut iter_back, n); + self.set_size_hint(iter_back.size_hint().0 as u64); + *self.iter_front_mut() = iter_back; + return; } self.clear_containers(); *self.iter_front_mut() = Self::empty_inner_iter(); *self.iter_back_mut() = None; + self.set_size_hint(0); } } @@ -452,6 +463,8 @@ mod iter_inner { #[inline] fn advance_container(&mut self) -> Option { if let Some(container) = self.pop_container_front() { + let front_size_hint = self.iter_front_mut().size_hint().0 as u64; + self.dec_size_hint(front_size_hint); let result = container.as_ref().key; *self.iter_front_mut() = container.into_iter(); Some(result) @@ -459,6 +472,8 @@ mod iter_inner { let mut iter_back = None; core::mem::swap(&mut iter_back, self.iter_back_mut()); *self.iter_front_mut() = iter_back.expect("bug!"); + let size_hint = self.iter_front_mut().size_hint().0 as u64; + self.set_size_hint(size_hint); if let Some(v) = self.iter_front_mut().peek() { let (key, _) = util::split(v); Some(key) @@ -510,11 +525,15 @@ mod iter_inner { #[inline] fn drain_containers_until(&mut self, index: usize) { - self.containers.drain(0..index); + let removed_elements = + self.containers.drain(0..index).map(|container| container.len()).sum(); + self.size_hint = self.size_hint.saturating_sub(removed_elements); } #[inline] fn clear_containers(&mut self) { + let removed_elements = self.containers.iter().map(|container| container.len()).sum(); + self.size_hint = self.size_hint.saturating_sub(removed_elements); self.containers.clear(); } @@ -534,8 +553,12 @@ mod iter_inner { } #[inline] - fn dec_size_hint(&mut self) { - self.size_hint = self.size_hint.saturating_sub(1); + fn dec_size_hint(&mut self, n: u64) { + self.size_hint = self.size_hint.saturating_sub(n); + } + + fn set_size_hint(&mut self, n: u64) { + self.size_hint = n; } fn empty_inner_iter() -> container::Iter<'static> { @@ -578,11 +601,16 @@ mod iter_inner { #[inline] fn drain_containers_until(&mut self, index: usize) { + let removed_elements = + self.containers[..index].iter().map(|container| container.len()).sum(); + self.size_hint = self.size_hint.saturating_sub(removed_elements); self.containers = &self.containers[index..] } #[inline] fn clear_containers(&mut self) { + let removed_elements = self.containers.iter().map(|container| container.len()).sum(); + self.size_hint = self.size_hint.saturating_sub(removed_elements); self.containers = &[]; } @@ -602,8 +630,12 @@ mod iter_inner { } #[inline] - fn dec_size_hint(&mut self) { - self.size_hint = self.size_hint.saturating_sub(1); + fn dec_size_hint(&mut self, n: u64) { + self.size_hint = self.size_hint.saturating_sub(n); + } + + fn set_size_hint(&mut self, n: u64) { + self.size_hint = n; } fn empty_inner_iter() -> container::Iter<'static> { diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 4d22a2ff..239e4c4a 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -25,13 +25,18 @@ pub enum Store { Bitmap(BitmapStore), } -pub enum Iter<'a> { +enum IterInner<'a> { Array(slice::Iter<'a, u16>), Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), } +pub struct Iter<'a> { + inner: IterInner<'a>, + size_hint: u64, +} + impl Store { pub fn new() -> Store { Store::Array(ArrayStore::new()) @@ -467,8 +472,10 @@ impl<'a> IntoIterator for &'a Store { type IntoIter = Iter<'a>; fn into_iter(self) -> Iter<'a> { match self { - Array(vec) => Iter::Array(vec.iter()), - Bitmap(bits) => Iter::BitmapBorrowed(bits.iter()), + Array(vec) => Iter { inner: IterInner::Array(vec.iter()), size_hint: vec.len() }, + Bitmap(bits) => { + Iter { inner: IterInner::BitmapBorrowed(bits.iter()), size_hint: bits.len() } + } } } } @@ -478,8 +485,14 @@ impl IntoIterator for Store { type IntoIter = Iter<'static>; fn into_iter(self) -> Iter<'static> { match self { - Array(vec) => Iter::Vec(vec.into_iter()), - Bitmap(bits) => Iter::BitmapOwned(bits.into_iter()), + Array(vec) => { + let size_hint = vec.len(); + Iter { inner: IterInner::Vec(vec.into_iter()), size_hint } + } + Bitmap(bits) => { + let size_hint = bits.len(); + Iter { inner: IterInner::BitmapOwned(bits.into_iter()), size_hint } + } } } } @@ -499,34 +512,44 @@ impl PartialEq for Store { impl Iter<'_> { pub fn peek(&mut self) -> Option { - match self { - Iter::Array(inner) => inner.as_slice().first().cloned(), - Iter::Vec(inner) => inner.as_slice().first().cloned(), - Iter::BitmapBorrowed(inner) => inner.peek(), - Iter::BitmapOwned(inner) => inner.peek(), + match &self.inner { + IterInner::Array(inner) => inner.as_slice().first().cloned(), + IterInner::Vec(inner) => inner.as_slice().first().cloned(), + IterInner::BitmapBorrowed(inner) => inner.peek(), + IterInner::BitmapOwned(inner) => inner.peek(), } } + + pub fn empty() -> Self { + Self { inner: IterInner::Array([].iter()), size_hint: 0 } + } } impl<'a> Iterator for Iter<'a> { type Item = u16; fn next(&mut self) -> Option { - match self { - Iter::Array(inner) => inner.next().cloned(), - Iter::Vec(inner) => inner.next(), - Iter::BitmapBorrowed(inner) => inner.next(), - Iter::BitmapOwned(inner) => inner.next(), + self.size_hint = self.size_hint.saturating_sub(1); + match &mut self.inner { + IterInner::Array(inner) => inner.next().cloned(), + IterInner::Vec(inner) => inner.next(), + IterInner::BitmapBorrowed(inner) => inner.next(), + IterInner::BitmapOwned(inner) => inner.next(), } } + + fn size_hint(&self) -> (usize, Option) { + (self.size_hint as usize, Some(self.size_hint as usize)) + } } impl DoubleEndedIterator for Iter<'_> { fn next_back(&mut self) -> Option { - match self { - Iter::Array(inner) => inner.next_back().cloned(), - Iter::Vec(inner) => inner.next_back(), - Iter::BitmapBorrowed(inner) => inner.next_back(), - Iter::BitmapOwned(inner) => inner.next_back(), + self.size_hint = self.size_hint.saturating_sub(1); + match &mut self.inner { + IterInner::Array(inner) => inner.next_back().cloned(), + IterInner::Vec(inner) => inner.next_back(), + IterInner::BitmapBorrowed(inner) => inner.next_back(), + IterInner::BitmapOwned(inner) => inner.next_back(), } } } diff --git a/roaring/tests/iter_advance_to.rs b/roaring/tests/iter_advance_to.rs index 9ff3e87d..07e55a74 100644 --- a/roaring/tests/iter_advance_to.rs +++ b/roaring/tests/iter_advance_to.rs @@ -17,6 +17,7 @@ fn iter_advance_past_end() { let mut i = bm.iter(); i.advance_to(15); assert_eq!(i.next(), None); + assert_eq!(i.size_hint(), (0, Some(0))); } #[test] @@ -24,10 +25,15 @@ fn iter_multi_container() { let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); let mut i = bm.iter(); i.advance_to(3); + assert_eq!(i.size_hint(), (3, Some(3))); assert_eq!(i.next(), Some(3)); + assert_eq!(i.size_hint(), (2, Some(2))); assert_eq!(i.next(), Some(100000)); + assert_eq!(i.size_hint(), (1, Some(1))); assert_eq!(i.next(), Some(100001)); + assert_eq!(i.size_hint(), (0, Some(0))); assert_eq!(i.next(), None); + assert_eq!(i.size_hint(), (0, Some(0))); } #[test] @@ -35,6 +41,7 @@ fn iter_empty() { let bm = RoaringBitmap::new(); let mut i = bm.iter(); i.advance_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); assert_eq!(i.next(), None) } @@ -43,8 +50,12 @@ fn into_iter_basic() { let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); let mut i = bm.into_iter(); i.advance_to(10); + let mut expected_size_hint = 4; + assert_eq!(i.size_hint(), (expected_size_hint, Some(expected_size_hint))); for n in 11..=14 { - assert_eq!(i.next(), Some(n)) + assert_eq!(i.next(), Some(n)); + expected_size_hint -= 1; + assert_eq!(i.size_hint(), (expected_size_hint, Some(expected_size_hint))); } assert_eq!(i.next(), None); } @@ -54,6 +65,7 @@ fn into_iter_multi_container() { let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); let mut i = bm.into_iter(); i.advance_to(3); + assert_eq!(i.size_hint(), (3, Some(3))); assert_eq!(i.next(), Some(3)); assert_eq!(i.next(), Some(100000)); assert_eq!(i.next(), Some(100001)); @@ -65,6 +77,7 @@ fn into_iter_empty() { let bm = RoaringBitmap::new(); let mut i = bm.into_iter(); i.advance_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); assert_eq!(i.next(), None) } @@ -74,6 +87,8 @@ fn advance_to_with_tail_iter() { let mut i = bm.iter(); i.next_back(); i.advance_to(100000); + assert_eq!(i.size_hint(), (1, Some(1))); assert_eq!(i.next(), Some(100000)); + assert_eq!(i.size_hint(), (0, Some(0))); assert_eq!(i.next(), None); } From 3c86fd9c2ca8bf978ebb5a5ac793fc76858bda8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Schj=C3=B8lberg?= Date: Wed, 9 Oct 2024 12:06:34 +0200 Subject: [PATCH 15/15] Fix nightly clippy errors --- roaring/src/bitmap/container.rs | 4 ++-- roaring/src/bitmap/iter.rs | 6 +++--- roaring/src/bitmap/store/mod.rs | 2 +- roaring/src/treemap/iter.rs | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index fb95b764..9b11cd57 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -295,7 +295,7 @@ impl IntoIterator for Container { } } -impl<'a> Iterator for Iter<'a> { +impl Iterator for Iter<'_> { type Item = u32; fn next(&mut self) -> Option { self.inner.next().map(|i| util::join(self.key, i)) @@ -318,7 +318,7 @@ impl fmt::Debug for Container { } } -impl<'a> Iter<'a> { +impl Iter<'_> { pub fn empty() -> Self { Self { key: 0, inner: store::Iter::empty() } } diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 957fd791..678e3a06 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -49,7 +49,7 @@ impl<'a> Iter<'a> { self.advance_to_inner(n); } } -impl<'a> Iterator for Iter<'a> { +impl Iterator for Iter<'_> { type Item = u32; fn next(&mut self) -> Option { self.next_inner() @@ -70,7 +70,7 @@ impl<'a> Iterator for Iter<'a> { self.fold_inner(init, f) } } -impl<'a> DoubleEndedIterator for Iter<'a> { +impl DoubleEndedIterator for Iter<'_> { fn next_back(&mut self) -> Option { self.next_back_inner() } @@ -84,7 +84,7 @@ impl<'a> DoubleEndedIterator for Iter<'a> { } } #[cfg(target_pointer_width = "64")] -impl<'a> ExactSizeIterator for Iter<'a> { +impl ExactSizeIterator for Iter<'_> { fn len(&self) -> usize { self.size_hint as usize } diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 239e4c4a..b0eef942 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -524,7 +524,7 @@ impl Iter<'_> { Self { inner: IterInner::Array([].iter()), size_hint: 0 } } } -impl<'a> Iterator for Iter<'a> { +impl Iterator for Iter<'_> { type Item = u16; fn next(&mut self) -> Option { diff --git a/roaring/src/treemap/iter.rs b/roaring/src/treemap/iter.rs index 96ccb00d..f8094a37 100644 --- a/roaring/src/treemap/iter.rs +++ b/roaring/src/treemap/iter.rs @@ -11,7 +11,7 @@ struct To64Iter<'a> { inner: Iter32<'a>, } -impl<'a> Iterator for To64Iter<'a> { +impl Iterator for To64Iter<'_> { type Item = u64; fn next(&mut self) -> Option { self.inner.next().map(|n| util::join(self.hi, n)) @@ -109,7 +109,7 @@ pub struct IntoIter { size_hint: u64, } -impl<'a> Iter<'a> { +impl Iter<'_> { fn new(map: &BTreeMap) -> Iter { let size_hint: u64 = map.iter().map(|(_, r)| r.len()).sum(); let i = map.iter().flat_map(to64iter as _); @@ -125,7 +125,7 @@ impl IntoIter { } } -impl<'a> Iterator for Iter<'a> { +impl Iterator for Iter<'_> { type Item = u64; fn next(&mut self) -> Option {